Merge tag 'ata-7.0-final' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux Pull ata fix from Niklas Cassel: - Add a quirk for JMicron JMB582/JMB585 AHCI controllers such that they only use 32-bit DMA addresses. While these controllers do report that they support 64-bit DMA addresses, a user reports that using 64-bit DMA addresses cause silent corruption even on modern x86 systems (Arthur) * tag 'ata-7.0-final' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux: ata: ahci: force 32-bit DMA for JMicron JMB582/JMB585
diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore index e8d2269..60b6b2a 100644 --- a/.get_maintainer.ignore +++ b/.get_maintainer.ignore
@@ -1,6 +1,7 @@ Alan Cox <alan@lxorguk.ukuu.org.uk> Alan Cox <root@hraefn.swansea.linux.org.uk> Alyssa Rosenzweig <alyssa@rosenzweig.io> +Askar Safin <safinaskar@gmail.com> Christoph Hellwig <hch@lst.de> Jeff Kirsher <jeffrey.t.kirsher@intel.com> Marc Gonzalez <marc.w.gonzalez@free.fr>
diff --git a/.mailmap b/.mailmap index e1cf6bb..2d04aeb 100644 --- a/.mailmap +++ b/.mailmap
@@ -210,10 +210,16 @@ Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com> +Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linaro.org> +Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@free.fr> +Daniel Lezcano <daniel.lezcano@kernel.org> <daniel.lezcano@linexp.org> +Daniel Lezcano <daniel.lezcano@kernel.org> <dlezcano@fr.ibm.com> Daniel Thompson <danielt@kernel.org> <daniel.thompson@linaro.org> +Daniele Alessandrelli <daniele.alessandrelli@gmail.com> <daniele.alessandrelli@intel.com> Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com> David Brownell <david-b@pacbell.net> David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> +David Gow <david@davidgow.net> <davidgow@google.com> David Heidelberg <david@ixit.cz> <d.okias@gmail.com> David Hildenbrand <david@kernel.org> <david@redhat.com> David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com> @@ -310,6 +316,7 @@ Hans Verkuil <hverkuil@kernel.org> <hansverk@cisco.com> Hao Ge <hao.ge@linux.dev> <gehao@kylinos.cn> Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com> +Harry Yoo <harry@kernel.org> <harry.yoo@oracle.com> Heiko Carstens <hca@linux.ibm.com> <h.carstens@de.ibm.com> Heiko Carstens <hca@linux.ibm.com> <heiko.carstens@de.ibm.com> Heiko Stuebner <heiko@sntech.de> <heiko.stuebner@bqreaders.com> @@ -321,6 +328,7 @@ Herbert Xu <herbert@gondor.apana.org.au> Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com> Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn> +Ignat Korchagin <ignat@linux.win> <ignat@cloudflare.com> Ike Panhc <ikepanhc@gmail.com> <ike.pan@canonical.com> J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com> J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu> @@ -348,6 +356,7 @@ Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com> Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com> Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com> +Jason Xing <kerneljasonxing@gmail.com> <kernelxing@tencent.com> <javier@osg.samsung.com> <javier.martinez@collabora.co.uk> Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com> Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com> @@ -396,6 +405,7 @@ Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com> Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com> Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org> +Joe Damato <joe@dama.to> <jdamato@fastly.com> Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com> Johan Hovold <johan@kernel.org> <jhovold@gmail.com> Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com> @@ -490,7 +500,8 @@ Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@linaro.org> Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@intel.com> Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> -Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> +Lorenzo Stoakes <ljs@kernel.org> <lstoakes@gmail.com> +Lorenzo Stoakes <ljs@kernel.org> <lorenzo.stoakes@oracle.com> Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net> Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz> Lucas De Marchi <demarchi@kernel.org> <lucas.demarchi@intel.com> @@ -577,6 +588,7 @@ Morten Welinder <welinder@anemone.rentec.com> Morten Welinder <welinder@darter.rentec.com> Morten Welinder <welinder@troll.com> +Muhammad Usama Anjum <usama.anjum@arm.com> <usama.anjum@collabora.com> Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org> Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org> Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org> @@ -876,6 +888,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com> +Vlastimil Babka <vbabka@kernel.org> <vbabka@suse.cz> WangYuli <wangyuli@aosc.io> <wangyl5933@chinaunicom.cn> WangYuli <wangyuli@aosc.io> <wangyuli@deepin.org> Weiwen Hu <huweiwen@linux.alibaba.com> <sehuww@mail.scut.edu.cn> @@ -890,7 +903,8 @@ Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com> Yixun Lan <dlan@kernel.org> <dlan@gentoo.org> Yixun Lan <dlan@kernel.org> <yixun.lan@amlogic.com> -Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com> +Yosry Ahmed <yosry@kernel.org> <yosryahmed@google.com> +Yosry Ahmed <yosry@kernel.org> <yosry.ahmed@linux.dev> Yu-Chun Lin <eleanor.lin@realtek.com> <eleanor15x@gmail.com> Yusuke Goda <goda.yusuke@renesas.com> Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
diff --git a/CREDITS b/CREDITS index d74c8b2..9091bac 100644 --- a/CREDITS +++ b/CREDITS
@@ -1242,6 +1242,10 @@ E: vfalico@gmail.com D: Co-maintainer and co-author of the network bonding driver. +N: Thomas Falcon +E: tlfalcon@linux.ibm.com +D: Initial author of the IBM ibmvnic network driver + N: János Farkas E: chexum@shadow.banki.hu D: romfs, various (mostly networking) fixes @@ -2415,6 +2419,10 @@ S: D53424 Remagen S: Germany +N: Jonathan Lemon +E: jonathan.lemon@gmail.com +D: OpenCompute PTP clock driver (ptp_ocp) + N: Colin Leroy E: colin@colino.net W: http://www.geekounet.org/
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index e538d485..64c0301 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -151,11 +151,11 @@ The algorithm_params file is write-only and is used to setup compression algorithm parameters. -What: /sys/block/zram<id>/writeback_compressed +What: /sys/block/zram<id>/compressed_writeback Date: Decemeber 2025 Contact: Richard Chang <richardycc@google.com> Description: - The writeback_compressed device atrribute toggles compressed + The compressed_writeback device atrribute toggles compressed writeback feature. What: /sys/block/zram<id>/writeback_batch_size
diff --git a/Documentation/ABI/testing/sysfs-driver-uniwill-laptop b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop index eaeb659..2df7079 100644 --- a/Documentation/ABI/testing/sysfs-driver-uniwill-laptop +++ b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop
@@ -1,4 +1,4 @@ -What: /sys/bus/platform/devices/INOU0000:XX/fn_lock_toggle_enable +What: /sys/bus/platform/devices/INOU0000:XX/fn_lock Date: November 2025 KernelVersion: 6.19 Contact: Armin Wolf <W_Armin@gmx.de> @@ -8,15 +8,15 @@ Reading this file returns the current enable status of the FN lock functionality. -What: /sys/bus/platform/devices/INOU0000:XX/super_key_toggle_enable +What: /sys/bus/platform/devices/INOU0000:XX/super_key_enable Date: November 2025 KernelVersion: 6.19 Contact: Armin Wolf <W_Armin@gmx.de> Description: - Allows userspace applications to enable/disable the super key functionality - of the integrated keyboard by writing "1"/"0" into this file. + Allows userspace applications to enable/disable the super key of the integrated + keyboard by writing "1"/"0" into this file. - Reading this file returns the current enable status of the super key functionality. + Reading this file returns the current enable status of the super key. What: /sys/bus/platform/devices/INOU0000:XX/touchpad_toggle_enable Date: November 2025
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index 3210c47..90fdfdd 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst
@@ -85,6 +85,16 @@ the error message to the Root Port. Please refer to PCIe specs for other fields. +The 'TLP Header' is the prefix/header of the TLP that caused the error +in raw hex format. To decode the TLP Header into human-readable form +one may use tlp-tool: + +https://github.com/mmpg-x86/tlp-tool + +Example usage:: + + curl -L https://git.kernel.org/linus/2ca1c94ce0b6 | rtlp-tool --aer + AER Ratelimits --------------
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 94bb7f2..451fa00 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -216,7 +216,7 @@ writeback_limit_enable RW show and set writeback_limit feature writeback_batch_size RW show and set maximum number of in-flight writeback operations -writeback_compressed RW show and set compressed writeback feature +compressed_writeback RW show and set compressed writeback feature comp_algorithm RW show and change the compression algorithm algorithm_params WO setup compression algorithm parameters compact WO trigger memory compaction @@ -439,11 +439,11 @@ By default zram stores written back pages in decompressed (raw) form, which means that writeback operation involves decompression of the page before writing it to the backing device. This behavior can be changed by enabling -`writeback_compressed` feature, which causes zram to write compressed pages +`compressed_writeback` feature, which causes zram to write compressed pages to the backing device, thus avoiding decompression overhead. To enable this feature, execute:: - $ echo yes > /sys/block/zramX/writeback_compressed + $ echo yes > /sys/block/zramX/compressed_writeback Note that this feature should be configured before the `zramX` device is initialized.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb850e5..03a5506 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -74,6 +74,7 @@ TPM TPM drivers are enabled. UMS USB Mass Storage support is enabled. USB USB support is enabled. + NVME NVMe support is enabled USBHID USB Human Interface Device support is enabled. V4L Video For Linux support is enabled. VGA The VGA console has been enabled. @@ -4787,6 +4788,18 @@ This can be set from sysctl after boot. See Documentation/admin-guide/sysctl/vm.rst for details. + nvme.quirks= [NVME] A list of quirk entries to augment the built-in + nvme quirk list. List entries are separated by a + '-' character. + Each entry has the form VendorID:ProductID:quirk_names. + The IDs are 4-digits hex numbers and quirk_names is a + list of quirk names separated by commas. A quirk name + can be prefixed by '^', meaning that the specified + quirk must be disabled. + + Example: + nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi + ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver. See Documentation/core-api/debugging-via-ohci1394.rst for more info. @@ -8183,6 +8196,9 @@ p = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT (Reduce timeout of the SET_ADDRESS request from 5000 ms to 500 ms); + q = USB_QUIRK_FORCE_ONE_CONFIG (Device + claims zero configurations, + forcing to 1); Example: quirks=0781:5580:bk,0a5c:5834:gij usbhid.mousepoll=
diff --git a/Documentation/admin-guide/laptops/uniwill-laptop.rst b/Documentation/admin-guide/laptops/uniwill-laptop.rst index a16baf1..aff5f57 100644 --- a/Documentation/admin-guide/laptops/uniwill-laptop.rst +++ b/Documentation/admin-guide/laptops/uniwill-laptop.rst
@@ -24,7 +24,7 @@ The ``uniwill-laptop`` driver allows the user to enable/disable: - - the FN and super key lock functionality of the integrated keyboard + - the FN lock and super key of the integrated keyboard - the touchpad toggle functionality of the integrated touchpad See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details.
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c105306..3b2ad61 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst
@@ -594,6 +594,9 @@ their sockets will only be able to connect within their own namespace. +The first write to ``child_ns_mode`` locks its value. Subsequent writes of the +same value succeed, but writing a different value returns ``-EBUSY``. + Changing ``child_ns_mode`` only affects namespaces created after the change; it does not modify the current namespace or any existing children.
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 1d7bfad..123c846 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst
@@ -149,11 +149,33 @@ DMA_ATTR_MMIO will not perform any cache flushing. The address provided must never be mapped cacheable into the CPU. -DMA_ATTR_CPU_CACHE_CLEAN ------------------------- +DMA_ATTR_DEBUGGING_IGNORE_CACHELINES +------------------------------------ -This attribute indicates the CPU will not dirty any cacheline overlapping this -DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows -multiple small buffers to safely share a cacheline without risk of data -corruption, suppressing DMA debug warnings about overlapping mappings. -All mappings sharing a cacheline should have this attribute. +This attribute indicates that CPU cache lines may overlap for buffers mapped +with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. + +Such overlap may occur when callers map multiple small buffers that reside +within the same cache line. In this case, callers must guarantee that the CPU +will not dirty these cache lines after the mappings are established. When this +condition is met, multiple buffers can safely share a cache line without risking +data corruption. + +All mappings that share a cache line must set this attribute to suppress DMA +debug warnings about overlapping mappings. + +DMA_ATTR_REQUIRE_COHERENT +------------------------- + +DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any +system where SWIOTLB or cache management is required. This should only +be used to support uAPI designs that require continuous HW DMA +coherence with userspace processes, for example RDMA and DRM. At a +minimum the memory being mapped must be userspace memory from +pin_user_pages() or similar. + +Drivers should consider using dma_mmap_pages() instead of this +interface when building their uAPIs, when possible. + +It must never be used in an in-kernel driver that only works with +kernel memory.
diff --git a/Documentation/dev-tools/kunit/run_wrapper.rst b/Documentation/dev-tools/kunit/run_wrapper.rst index 3c0b585..770bb09 100644 --- a/Documentation/dev-tools/kunit/run_wrapper.rst +++ b/Documentation/dev-tools/kunit/run_wrapper.rst
@@ -336,6 +336,8 @@ - ``--list_tests_attr``: If set, lists all tests that will be run and all of their attributes. +- ``--list_suites``: If set, lists all suites that will be run. + Command-line completion ==============================
diff --git a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml index b90eec2..fe1272e 100644 --- a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml +++ b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
@@ -66,7 +66,7 @@ required: - refresh-rate-hz -additionalProperties: false +unevaluatedProperties: false examples: - |
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml index 11e40d2..d97b29e 100644 --- a/Documentation/devicetree/bindings/connector/usb-connector.yaml +++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml
@@ -301,6 +301,7 @@ maxItems: 4 dependencies: + pd-disable: [typec-power-opmode] sink-vdos-v1: [ sink-vdos ] sink-vdos: [ sink-vdos-v1 ]
diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index ebda78d..02ddfaa 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml
@@ -253,7 +253,6 @@ enum: # these platforms support 2 streams MST on some interfaces, # others are SST only - - qcom,glymur-dp - qcom,sc8280xp-dp - qcom,x1e80100-dp then: @@ -310,6 +309,26 @@ minItems: 6 maxItems: 8 + - if: + properties: + compatible: + contains: + enum: + # these platforms support 2 streams MST on some interfaces, + # others are SST only, but all controllers have 4 ports + - qcom,glymur-dp + then: + properties: + reg: + minItems: 9 + maxItems: 9 + clocks: + minItems: 5 + maxItems: 6 + clocks-names: + minItems: 5 + maxItems: 6 + unevaluatedProperties: false examples:
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml index 2329ed9..64dde43 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml
@@ -176,13 +176,17 @@ }; }; - displayport-controller@ae90000 { + displayport-controller@af54000 { compatible = "qcom,glymur-dp"; - reg = <0xae90000 0x200>, - <0xae90200 0x200>, - <0xae90400 0x600>, - <0xae91000 0x400>, - <0xae91400 0x400>; + reg = <0xaf54000 0x200>, + <0xaf54200 0x200>, + <0xaf55000 0xc00>, + <0xaf56000 0x400>, + <0xaf57000 0x400>, + <0xaf58000 0x400>, + <0xaf59000 0x400>, + <0xaf5a000 0x600>, + <0xaf5b000 0x600>; interrupt-parent = <&mdss>; interrupts = <12>;
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index f0cdb54..bb09ecd 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml
@@ -33,7 +33,7 @@ - const: core iommus: - maxItems: 2 + maxItems: 1 interconnects: items: @@ -107,8 +107,7 @@ interconnect-names = "mdp0-mem", "cpu-cfg"; - iommus = <&apps_smmu 0x420 0x2>, - <&apps_smmu 0x421 0x0>; + iommus = <&apps_smmu 0x420 0x2>; ranges; display-controller@5e01000 {
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml index d55fda9..a38c226 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml
@@ -10,7 +10,7 @@ - Krzysztof Kozlowski <krzk@kernel.org> description: - SM8650 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like + SM8750 MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like DPU display controller, DSI and DP interfaces etc. $ref: /schemas/display/msm/mdss-common.yaml#
diff --git a/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml index 184432d..f42c546 100644 --- a/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml
@@ -37,7 +37,7 @@ const: 2 "#interrupt-cells": - const: 1 + const: 2 ngpios: description: @@ -86,7 +86,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupts = <53>, <53>, <53>, <53>, <53>, <53>, <53>, <53>, <53>, <53>, <53>, <53>,
diff --git a/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml b/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml index 966b221b..5803a17 100644 --- a/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml +++ b/Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml
@@ -16,7 +16,6 @@ properties: compatible: enum: - - kontron,sa67mcu-hwmon - kontron,sl28cpld-fan reg:
diff --git a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml index 9142001..082fdc2 100644 --- a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml
@@ -7,7 +7,7 @@ title: Synopsys DesignWare APB I2C Controller maintainers: - - Jarkko Nikula <jarkko.nikula@linux.intel.com> + - Mika Westerberg <mika.westerberg@linux.intel.com> allOf: - $ref: /schemas/i2c/i2c-controller.yaml#
diff --git a/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml b/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml index 3f3ee82..7e6dc41 100644 --- a/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml +++ b/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml
@@ -42,7 +42,7 @@ - const: vcodec0_bus iommus: - maxItems: 5 + maxItems: 2 interconnects: maxItems: 2 @@ -102,10 +102,7 @@ memory-region = <&pil_video_mem>; iommus = <&apps_smmu 0x860 0x0>, - <&apps_smmu 0x880 0x0>, - <&apps_smmu 0x861 0x04>, - <&apps_smmu 0x863 0x0>, - <&apps_smmu 0x804 0xe0>; + <&apps_smmu 0x880 0x0>; interconnects = <&mmnrt_virt MASTER_VIDEO_P0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>,
diff --git a/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml b/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml index 8fe27aa..e7385d9 100644 --- a/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml +++ b/Documentation/devicetree/bindings/mtd/st,spear600-smi.yaml
@@ -19,9 +19,6 @@ Flash sub nodes describe the memory range and optional per-flash properties. -allOf: - - $ref: mtd.yaml# - properties: compatible: const: st,spear600-smi @@ -42,14 +39,29 @@ $ref: /schemas/types.yaml#/definitions/uint32 description: Functional clock rate of the SMI controller in Hz. - st,smi-fast-mode: - type: boolean - description: Indicates that the attached flash supports fast read mode. +patternProperties: + "^flash@.*$": + $ref: /schemas/mtd/mtd.yaml# + + properties: + reg: + maxItems: 1 + + st,smi-fast-mode: + type: boolean + description: Indicates that the attached flash supports fast read mode. + + unevaluatedProperties: false + + required: + - reg required: - compatible - reg - clock-rate + - "#address-cells" + - "#size-cells" unevaluatedProperties: false @@ -64,7 +76,7 @@ interrupts = <12>; clock-rate = <50000000>; /* 50 MHz */ - flash@f8000000 { + flash@fc000000 { reg = <0xfc000000 0x1000>; st,smi-fast-mode; };
diff --git a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml index ec0c216..6bcfff9 100644 --- a/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml +++ b/Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml
@@ -87,6 +87,7 @@ allOf: - $ref: can-controller.yaml# + - $ref: /schemas/memory-controllers/mc-peripheral-props.yaml - if: properties: compatible:
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml b/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml new file mode 100644 index 0000000..9e37d15 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/fsl,mpc83xx.yaml
@@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/powerpc/fsl/fsl,mpc83xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale PowerQUICC II Pro (MPC83xx) platforms + +maintainers: + - J. Neuschäfer <j.ne@posteo.net> + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: MPC83xx Reference Design Boards + items: + - enum: + - fsl,mpc8308rdb + - fsl,mpc8315erdb + - fsl,mpc8360rdk + - fsl,mpc8377rdb + - fsl,mpc8377wlan + - fsl,mpc8378rdb + - fsl,mpc8379rdb + + - description: MPC8313E Reference Design Board + items: + - const: MPC8313ERDB + - const: MPC831xRDB + - const: MPC83xxRDB + + - description: MPC8323E Reference Design Board + items: + - const: MPC8323ERDB + - const: MPC832xRDB + - const: MPC83xxRDB + + - description: MPC8349E-mITX(-GP) Reference Design Platform + items: + - enum: + - MPC8349EMITX + - MPC8349EMITXGP + - const: MPC834xMITX + - const: MPC83xxMITX + + - description: Keymile KMETER1 board + const: keymile,KMETER1 + + - description: MPC8308 P1M board + const: denx,mpc8308_p1m + +patternProperties: + "^soc@.*$": + type: object + properties: + compatible: + oneOf: + - items: + - enum: + - fsl,mpc8315-immr + - fsl,mpc8308-immr + - const: simple-bus + - items: + - const: fsl,mpc8360-immr + - const: fsl,immr + - const: fsl,soc + - const: simple-bus + - const: simple-bus + +additionalProperties: true + +examples: + - | + / { + compatible = "fsl,mpc8315erdb"; + model = "MPC8315E-RDB"; + #address-cells = <1>; + #size-cells = <1>; + + soc@e0000000 { + compatible = "fsl,mpc8315-immr", "simple-bus"; + reg = <0xe0000000 0x00000200>; + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + ranges = <0 0xe0000000 0x00100000>; + bus-frequency = <0>; + }; + }; + +...
diff --git a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml index d6b3b5a..fe4ac93 100644 --- a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
@@ -287,7 +287,7 @@ regulator-max-microvolt = <1700000>; }; mt6359_vrfck_1_ldo_reg: ldo_vrfck_1 { - regulator-name = "vrfck"; + regulator-name = "vrfck_1"; regulator-min-microvolt = <1240000>; regulator-max-microvolt = <1600000>; }; @@ -309,7 +309,7 @@ regulator-max-microvolt = <3300000>; }; mt6359_vemc_1_ldo_reg: ldo_vemc_1 { - regulator-name = "vemc"; + regulator-name = "vemc_1"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <3300000>; };
diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml index 042e563..019aeb6 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -168,7 +168,7 @@ offset from voltage set to regulator. regulator-uv-protection-microvolt: - description: Set over under voltage protection limit. This is a limit where + description: Set under voltage protection limit. This is a limit where hardware performs emergency shutdown. Zero can be passed to disable protection and value '1' indicates that protection should be enabled but limit setting can be omitted. Limit is given as microvolt offset from @@ -182,7 +182,7 @@ is given as microvolt offset from voltage set to regulator. regulator-uv-warn-microvolt: - description: Set over under voltage warning limit. This is a limit where + description: Set under voltage warning limit. This is a limit where hardware is assumed still to be functional but approaching limit where it gets damaged. Recovery actions should be initiated. Zero can be passed to disable detection and value '1' indicates that detection should
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml index da89523..92bc3ef 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml
@@ -23,6 +23,7 @@ enum: - nvidia,tegra210-audio-graph-card - nvidia,tegra186-audio-graph-card + - nvidia,tegra238-audio-graph-card - nvidia,tegra264-audio-graph-card clocks:
diff --git a/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml b/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml index e4cdbf2..1394f78 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml +++ b/Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml
@@ -20,6 +20,7 @@ - renesas,r9a07g044-ssi # RZ/G2{L,LC} - renesas,r9a07g054-ssi # RZ/V2L - renesas,r9a08g045-ssi # RZ/G3S + - renesas,r9a08g046-ssi # RZ/G3L - const: renesas,rz-ssi reg:
diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index 56c755c..502907d 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml
@@ -33,6 +33,7 @@ - const: rockchip,rk3066-spdif - items: - enum: + - rockchip,rk3576-spdif - rockchip,rk3588-spdif - const: rockchip,rk3568-spdif
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index 4a7129d..551edf3 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
@@ -164,7 +164,7 @@ properties: compatible: contains: - const: st,stm32mph7-sai + const: st,stm32h7-sai then: properties: clocks:
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index a606703..6af4ff2 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -6,9 +6,6 @@ title: Allwinner A31 SPI Controller -allOf: - - $ref: spi-controller.yaml - maintainers: - Chen-Yu Tsai <wens@csie.org> - Maxime Ripard <mripard@kernel.org> @@ -82,11 +79,11 @@ spi-rx-bus-width: items: - - const: 1 + enum: [0, 1, 2, 4] spi-tx-bus-width: items: - - const: 1 + enum: [0, 1, 2, 4] required: - compatible @@ -95,6 +92,28 @@ - clocks - clock-names +allOf: + - $ref: spi-controller.yaml + - if: + not: + properties: + compatible: + contains: + enum: + - allwinner,sun50i-r329-spi + - allwinner,sun55i-a523-spi + then: + patternProperties: + "^.*@[0-9a-f]+": + properties: + spi-rx-bus-width: + items: + enum: [0, 1] + + spi-tx-bus-width: + items: + enum: [0, 1] + unevaluatedProperties: false examples:
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index 8183857..8ebebce 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -26,21 +26,6 @@ properties: compatible: contains: - enum: - - baikal,bt1-sys-ssi - then: - properties: - mux-controls: - maxItems: 1 - required: - - mux-controls - else: - required: - - interrupts - - if: - properties: - compatible: - contains: const: amd,pensando-elba-spi then: required: @@ -75,10 +60,6 @@ const: intel,mountevans-imc-ssi - description: AMD Pensando Elba SoC SPI Controller const: amd,pensando-elba-spi - - description: Baikal-T1 SPI Controller - const: baikal,bt1-ssi - - description: Baikal-T1 System Boot SPI Controller - const: baikal,bt1-sys-ssi - description: Canaan Kendryte K210 SoS SPI Controller const: canaan,k210-spi - description: Renesas RZ/N1 SPI Controller @@ -170,6 +151,7 @@ - "#address-cells" - "#size-cells" - clocks + - interrupts examples: - | @@ -190,15 +172,4 @@ rx-sample-delay-ns = <7>; }; }; - - | - spi@1f040100 { - compatible = "baikal,bt1-sys-ssi"; - reg = <0x1f040100 0x900>, - <0x1c000000 0x1000000>; - #address-cells = <1>; - #size-cells = <0>; - mux-controls = <&boot_mux>; - clocks = <&ccu_sys>; - clock-names = "ssi_clk"; - }; ...
diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst index d1d311a..fa0888c 100644 --- a/Documentation/driver-api/driver-model/binding.rst +++ b/Documentation/driver-api/driver-model/binding.rst
@@ -99,3 +99,51 @@ When a driver is removed, the list of devices that it supports is iterated over, and the driver's remove callback is called for each one. The device is removed from that list and the symlinks removed. + + +Driver Override +~~~~~~~~~~~~~~~ + +Userspace may override the standard matching by writing a driver name to +a device's ``driver_override`` sysfs attribute. When set, only a driver +whose name matches the override will be considered during binding. This +bypasses all bus-specific matching (OF, ACPI, ID tables, etc.). + +The override may be cleared by writing an empty string, which returns +the device to standard matching rules. Writing to ``driver_override`` +does not automatically unbind the device from its current driver or +make any attempt to load the specified driver. + +Buses opt into this mechanism by setting the ``driver_override`` flag in +their ``struct bus_type``:: + + const struct bus_type example_bus_type = { + ... + .driver_override = true, + }; + +When the flag is set, the driver core automatically creates the +``driver_override`` sysfs attribute for every device on that bus. + +The bus's ``match()`` callback should check the override before performing +its own matching, using ``device_match_driver_override()``:: + + static int example_match(struct device *dev, const struct device_driver *drv) + { + int ret; + + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; + + /* Fall through to bus-specific matching... */ + } + +``device_match_driver_override()`` returns > 0 if the override matches +the given driver, 0 if the override is set but does not match, or < 0 if +no override is set at all. + +Additional helpers are available: + +- ``device_set_driver_override()`` - set or clear the override from kernel code. +- ``device_has_driver_override()`` - check whether an override is set.
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index af5a69f..eb84651 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst
@@ -783,6 +783,56 @@ mounted with "uuid=on". +Durability and copy up +---------------------- + +The fsync(2) system call ensures that the data and metadata of a file +are safely written to the backing storage, which is expected to +guarantee the existence of the information post system crash. + +Without an fsync(2) call, there is no guarantee that the observed +data after a system crash will be either the old or the new data, but +in practice, the observed data after crash is often the old or new data +or a mix of both. + +When an overlayfs file is modified for the first time, copy up will +create a copy of the lower file and its parent directories in the upper +layer. Since the Linux filesystem API does not enforce any particular +ordering on storing changes without explicit fsync(2) calls, in case +of a system crash, the upper file could end up with no data at all +(i.e. zeros), which would be an unusual outcome. To avoid this +experience, overlayfs calls fsync(2) on the upper file before completing +data copy up with rename(2) or link(2) to make the copy up "atomic". + +By default, overlayfs does not explicitly call fsync(2) on copied up +directories or on metadata-only copy up, so it provides no guarantee to +persist the user's modification unless the user calls fsync(2). +The fsync during copy up only guarantees that if a copy up is observed +after a crash, the observed data is not zeroes or intermediate values +from the copy up staging area. + +On traditional local filesystems with a single journal (e.g. ext4, xfs), +fsync on a file also persists the parent directory changes, because they +are usually modified in the same transaction, so metadata durability during +data copy up effectively comes for free. Overlayfs further limits risk by +disallowing network filesystems as upper layer. + +Overlayfs can be tuned to prefer performance or durability when storing +to the underlying upper layer. This is controlled by the "fsync" mount +option, which supports these values: + +- "auto": (default) + Call fsync(2) on upper file before completion of data copy up. + No explicit fsync(2) on directory or metadata-only copy up. +- "strict": + Call fsync(2) on upper file and directories before completion of any + copy up. +- "volatile": [*] + Prefer performance over durability (see `Volatile mount`_) + +[*] The mount option "volatile" is an alias to "fsync=volatile". + + Volatile mount --------------
diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst index 1c85a2af..375f6d6 100644 --- a/Documentation/hwmon/adm1177.rst +++ b/Documentation/hwmon/adm1177.rst
@@ -27,10 +27,10 @@ Sysfs entries ------------- -The following attributes are supported. Current maxim attribute +The following attributes are supported. Current maximum attribute is read-write, all other attributes are read-only. -in0_input Measured voltage in microvolts. +in0_input Measured voltage in millivolts. -curr1_input Measured current in microamperes. -curr1_max_alarm Overcurrent alarm in microamperes. +curr1_input Measured current in milliamperes. +curr1_max Overcurrent shutdown threshold in milliamperes.
diff --git a/Documentation/hwmon/emc1403.rst b/Documentation/hwmon/emc1403.rst index 57f833b..77060d5 100644 --- a/Documentation/hwmon/emc1403.rst +++ b/Documentation/hwmon/emc1403.rst
@@ -57,7 +57,7 @@ - https://ww1.microchip.com/downloads/en/DeviceDoc/EMC1438%20DS%20Rev.%201.0%20(04-29-10).pdf Author: - Kalhan Trisal <kalhan.trisal@intel.com + Kalhan Trisal <kalhan.trisal@intel.com> Description
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index d91dbb2..b2ca851 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst
@@ -220,7 +220,6 @@ q54sj108a2 qnap-mcu-hwmon raspberrypi-hwmon - sa67 sbrmi sbtsi_temp sch5627
diff --git a/Documentation/hwmon/peci-cputemp.rst b/Documentation/hwmon/peci-cputemp.rst index fe04222..266b62a 100644 --- a/Documentation/hwmon/peci-cputemp.rst +++ b/Documentation/hwmon/peci-cputemp.rst
@@ -51,8 +51,9 @@ temp1_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp1_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp1_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp2_label "DTS" temp2_input Provides current temperature of the CPU package scaled @@ -62,8 +63,9 @@ temp2_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp2_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp2_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp3_label "Tcontrol" temp3_input Provides current Tcontrol temperature of the CPU
diff --git a/Documentation/hwmon/sa67.rst b/Documentation/hwmon/sa67.rst deleted file mode 100644 index 029c7c1..0000000 --- a/Documentation/hwmon/sa67.rst +++ /dev/null
@@ -1,41 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-only - -Kernel driver sa67mcu -===================== - -Supported chips: - - * Kontron sa67mcu - - Prefix: 'sa67mcu' - - Datasheet: not available - -Authors: Michael Walle <mwalle@kernel.org> - -Description ------------ - -The sa67mcu is a board management controller which also exposes a hardware -monitoring controller. - -The controller has two voltage and one temperature sensor. The values are -hold in two 8 bit registers to form one 16 bit value. Reading the lower byte -will also capture the high byte to make the access atomic. The unit of the -volatge sensors are 1mV and the unit of the temperature sensor is 0.1degC. - -Sysfs entries -------------- - -The following attributes are supported. - -======================= ======================================================== -in0_label "VDDIN" -in0_input Measured VDDIN voltage. - -in1_label "VDD_RTC" -in1_input Measured VDD_RTC voltage. - -temp1_input MCU temperature. Roughly the board temperature. -======================= ======================================================== -
diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml index 0b1b54b..3f2ad77 100644 --- a/Documentation/netlink/specs/net_shaper.yaml +++ b/Documentation/netlink/specs/net_shaper.yaml
@@ -247,8 +247,8 @@ flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: - ifindex @@ -278,8 +278,8 @@ flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: *ns-binding @@ -309,8 +309,8 @@ flags: [admin-perm] do: - pre: net-shaper-nl-pre-doit - post: net-shaper-nl-post-doit + pre: net-shaper-nl-pre-doit-write + post: net-shaper-nl-post-doit-write request: attributes: - ifindex
diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index badb2fe..f87b5a0 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml
@@ -152,7 +152,7 @@ - compound-ops - name: threads-set - doc: set the number of running threads + doc: set the maximum number of running threads attribute-set: server flags: [admin-perm] do: @@ -165,7 +165,7 @@ - min-threads - name: threads-get - doc: get the number of running threads + doc: get the maximum number of running threads attribute-set: server do: reply:
diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index c0cf93e..27b028e 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst
@@ -5,8 +5,138 @@ Linux kernel developers take security very seriously. As such, we'd like to know when a security bug is found so that it can be fixed and -disclosed as quickly as possible. Please report security bugs to the -Linux kernel security team. +disclosed as quickly as possible. + +Preparing your report +--------------------- + +Like with any bug report, a security bug report requires a lot of analysis work +from the developers, so the more information you can share about the issue, the +better. Please review the procedure outlined in +Documentation/admin-guide/reporting-issues.rst if you are unclear about what +information is helpful. The following information are absolutely necessary in +**any** security bug report: + + * **affected kernel version range**: with no version indication, your report + will not be processed. A significant part of reports are for bugs that + have already been fixed, so it is extremely important that vulnerabilities + are verified on recent versions (development tree or latest stable + version), at least by verifying that the code has not changed since the + version where it was detected. + + * **description of the problem**: a detailed description of the problem, with + traces showing its manifestation, and why you consider that the observed + behavior as a problem in the kernel, is necessary. + + * **reproducer**: developers will need to be able to reproduce the problem to + consider a fix as effective. This includes both a way to trigger the issue + and a way to confirm it happens. A reproducer with low complexity + dependencies will be needed (source code, shell script, sequence of + instructions, file-system image etc). Binary-only executables are not + accepted. Working exploits are extremely helpful and will not be released + without consent from the reporter, unless they are already public. By + definition if an issue cannot be reproduced, it is not exploitable, thus it + is not a security bug. + + * **conditions**: if the bug depends on certain configuration options, + sysctls, permissions, timing, code modifications etc, these should be + indicated. + +In addition, the following information are highly desirable: + + * **suspected location of the bug**: the file names and functions where the + bug is suspected to be present are very important, at least to help forward + the report to the appropriate maintainers. When not possible (for example, + "system freezes each time I run this command"), the security team will help + identify the source of the bug. + + * **a proposed fix**: bug reporters who have analyzed the cause of a bug in + the source code almost always have an accurate idea on how to fix it, + because they spent a long time studying it and its implications. Proposing + a tested fix will save maintainers a lot of time, even if the fix ends up + not being the right one, because it helps understand the bug. When + proposing a tested fix, please always format it in a way that can be + immediately merged (see Documentation/process/submitting-patches.rst). + This will save some back-and-forth exchanges if it is accepted, and you + will be credited for finding and fixing this issue. Note that in this case + only a ``Signed-off-by:`` tag is needed, without ``Reported-by:`` when the + reporter and author are the same. + + * **mitigations**: very often during a bug analysis, some ways of mitigating + the issue appear. It is useful to share them, as they can be helpful to + keep end users protected during the time it takes them to apply the fix. + +Identifying contacts +-------------------- + +The most effective way to report a security bug is to send it directly to the +affected subsystem's maintainers and Cc: the Linux kernel security team. Do +not send it to a public list at this stage, unless you have good reasons to +consider the issue as being public or trivial to discover (e.g. result of a +widely available automated vulnerability scanning tool that can be repeated by +anyone). + +If you're sending a report for issues affecting multiple parts in the kernel, +even if they're fairly similar issues, please send individual messages (think +that maintainers will not all work on the issues at the same time). The only +exception is when an issue concerns closely related parts maintained by the +exact same subset of maintainers, and these parts are expected to be fixed all +at once by the same commit, then it may be acceptable to report them at once. + +One difficulty for most first-time reporters is to figure the right list of +recipients to send a report to. In the Linux kernel, all official maintainers +are trusted, so the consequences of accidentally including the wrong maintainer +are essentially a bit more noise for that person, i.e. nothing dramatic. As +such, a suitable method to figure the list of maintainers (which kernel +security officers use) is to rely on the get_maintainer.pl script, tuned to +only report maintainers. This script, when passed a file name, will look for +its path in the MAINTAINERS file to figure a hierarchical list of relevant +maintainers. Calling it a first time with the finest level of filtering will +most of the time return a short list of this specific file's maintainers:: + + $ ./scripts/get_maintainer.pl --no-l --no-r --pattern-depth 1 \ + drivers/example.c + Developer One <dev1@example.com> (maintainer:example driver) + Developer Two <dev2@example.org> (maintainer:example driver) + +These two maintainers should then receive the message. If the command does not +return anything, it means the affected file is part of a wider subsystem, so we +should be less specific:: + + $ ./scripts/get_maintainer.pl --no-l --no-r drivers/example.c + Developer One <dev1@example.com> (maintainer:example subsystem) + Developer Two <dev2@example.org> (maintainer:example subsystem) + Developer Three <dev3@example.com> (maintainer:example subsystem [GENERAL]) + Developer Four <dev4@example.org> (maintainer:example subsystem [GENERAL]) + +Here, picking the first, most specific ones, is sufficient. When the list is +long, it is possible to produce a comma-delimited e-mail address list on a +single line suitable for use in the To: field of a mailer like this:: + + $ ./scripts/get_maintainer.pl --no-tree --no-l --no-r --no-n --m \ + --no-git-fallback --no-substatus --no-rolestats --no-multiline \ + --pattern-depth 1 drivers/example.c + dev1@example.com, dev2@example.org + +or this for the wider list:: + + $ ./scripts/get_maintainer.pl --no-tree --no-l --no-r --no-n --m \ + --no-git-fallback --no-substatus --no-rolestats --no-multiline \ + drivers/example.c + dev1@example.com, dev2@example.org, dev3@example.com, dev4@example.org + +If at this point you're still facing difficulties spotting the right +maintainers, **and only in this case**, it's possible to send your report to +the Linux kernel security team only. Your message will be triaged, and you +will receive instructions about whom to contact, if needed. Your message may +equally be forwarded as-is to the relevant maintainers. + +Sending the report +------------------ + +Reports are to be sent over e-mail exclusively. Please use a working e-mail +address, preferably the same that you want to appear in ``Reported-by`` tags +if any. If unsure, send your report to yourself first. The security team and maintainers almost always require additional information beyond what was initially provided in a report and rely on @@ -18,20 +148,12 @@ or cannot effectively discuss their findings may be abandoned if the communication does not quickly improve. -As it is with any bug, the more information provided the easier it -will be to diagnose and fix. Please review the procedure outlined in -'Documentation/admin-guide/reporting-issues.rst' if you are unclear about what -information is helpful. Any exploit code is very helpful and will not -be released without consent from the reporter unless it has already been -made public. - +The report must be sent to maintainers, with the security team in ``Cc:``. The Linux kernel security team can be contacted by email at <security@kernel.org>. This is a private list of security officers -who will help verify the bug report and develop and release a fix. -If you already have a fix, please include it with your report, as -that can speed up the process considerably. It is possible that the -security team will bring in extra help from area maintainers to -understand and fix the security vulnerability. +who will help verify the bug report and assist developers working on a fix. +It is possible that the security team will bring in extra help from area +maintainers to understand and fix the security vulnerability. Please send **plain text** emails without attachments where possible. It is much harder to have a context-quoted discussion about a complex @@ -42,7 +164,9 @@ Markdown, HTML and RST formatted reports are particularly frowned upon since they're quite hard to read for humans and encourage to use dedicated viewers, sometimes online, which by definition is not acceptable for a confidential -security report. +security report. Note that some mailers tend to mangle formatting of plain +text by default, please consult Documentation/process/email-clients.rst for +more info. Disclosure and embargoed information ------------------------------------
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 9e2882d..d74c2c2 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst
@@ -43,7 +43,6 @@ CONFIG_DEBUG_INFO_BTF=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y - CONFIG_PAHOLE_HAS_BTF_TAG=y sched_ext is used only when the BPF scheduler is loaded and running. @@ -58,7 +57,8 @@ However, when the BPF scheduler is loaded and ``SCX_OPS_SWITCH_PARTIAL`` is set in ``ops->flags``, only tasks with the ``SCHED_EXT`` policy are scheduled by sched_ext, while tasks with ``SCHED_NORMAL``, ``SCHED_BATCH`` and -``SCHED_IDLE`` policies are scheduled by the fair-class scheduler. +``SCHED_IDLE`` policies are scheduled by the fair-class scheduler which has +higher sched_class precedence than ``SCHED_EXT``. Terminating the sched_ext scheduler program, triggering `SysRq-S`, or detection of any internal error including stalled runnable tasks aborts the @@ -345,6 +345,8 @@ The functions prefixed with ``scx_bpf_`` can be called from the BPF scheduler. +* ``kernel/sched/ext_idle.c`` contains the built-in idle CPU selection policy. + * ``tools/sched_ext/`` hosts example BPF scheduler implementations. * ``scx_simple[.bpf].c``: Minimal global FIFO scheduler example using a @@ -353,13 +355,35 @@ * ``scx_qmap[.bpf].c``: A multi-level FIFO scheduler supporting five levels of priority implemented with ``BPF_MAP_TYPE_QUEUE``. + * ``scx_central[.bpf].c``: A central FIFO scheduler where all scheduling + decisions are made on one CPU, demonstrating ``LOCAL_ON`` dispatching, + tickless operation, and kthread preemption. + + * ``scx_cpu0[.bpf].c``: A scheduler that queues all tasks to a shared DSQ + and only dispatches them on CPU0 in FIFO order. Useful for testing bypass + behavior. + + * ``scx_flatcg[.bpf].c``: A flattened cgroup hierarchy scheduler + implementing hierarchical weight-based cgroup CPU control by compounding + each cgroup's share at every level into a single flat scheduling layer. + + * ``scx_pair[.bpf].c``: A core-scheduling example that always makes + sibling CPU pairs execute tasks from the same CPU cgroup. + + * ``scx_sdt[.bpf].c``: A variation of ``scx_simple`` demonstrating BPF + arena memory management for per-task data. + + * ``scx_userland[.bpf].c``: A minimal scheduler demonstrating user space + scheduling. Tasks with CPU affinity are direct-dispatched in FIFO order; + all others are scheduled in user space by a simple vruntime scheduler. + ABI Instability =============== The APIs provided by sched_ext to BPF schedulers programs have no stability guarantees. This includes the ops table callbacks and constants defined in ``include/linux/sched/ext.h``, as well as the ``scx_bpf_`` kfuncs defined in -``kernel/sched/ext.c``. +``kernel/sched/ext.c`` and ``kernel/sched/ext_idle.c``. While we will attempt to provide a relatively stable API surface when possible, they are subject to change without warning between kernel
diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index 0a4eaa7..55b845d 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst
@@ -2372,6 +2372,10 @@ audible volume * bit 25: ``mixer_capture_min_mute`` Similar to bit 24 but for capture streams + * bit 26: ``skip_iface_setup`` + Skip the probe-time interface setup (usb_set_interface, + init_pitch, init_sample_rate); redundant with + snd_usb_endpoint_prepare() at stream-open time This module supports multiple devices, autoprobe and hotplugging.
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 13134bc..7f86d7a 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst
@@ -8,7 +8,7 @@ ===================================== :Author: Mickaël Salaün -:Date: January 2026 +:Date: March 2026 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -197,12 +197,27 @@ .. code-block:: c - __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - if (abi < 7) { - /* Clear logging flags unsupported before ABI 7. */ + __u32 restrict_flags = + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_TSYNC; + switch (abi) { + case 1 ... 6: + /* Removes logging flags for ABI < 7 */ restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); + __attribute__((fallthrough)); + case 7: + /* + * Removes multithreaded enforcement flag for ABI < 8 + * + * WARNING: Without this flag, calling landlock_restrict_self(2) is + * only equivalent if the calling process is single-threaded. Below + * ABI v8 (and as of ABI v8, when not using this flag), a Landlock + * policy would only be enforced for the calling thread and its + * children (and not for all threads, including parents and siblings). + */ + restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC; } The next step is to restrict the current thread from gaining more privileges
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fc57368..0325167 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst
@@ -1396,7 +1396,10 @@ Memory for the region is taken starting at the address denoted by the field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including -anonymous memory, ordinary files, and hugetlbfs. +anonymous memory, ordinary files, and hugetlbfs. Changes in the backing +of the memory region are automatically reflected into the guest. +For example, an mmap() that affects the region will be made visible +immediately. Another example is madvise(MADV_DROP). On architectures that support a form of address tagging, userspace_addr must be an untagged address. @@ -1412,11 +1415,6 @@ to make a new slot read-only. In this case, writes to this memory will be posted to userspace as KVM_EXIT_MMIO exits. -When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of -the memory region are automatically reflected into the guest. For example, an -mmap() that affects the region will be made visible immediately. Another -example is madvise(MADV_DROP). - For TDX guest, deleting/moving memory region loses guest memory contents. Read only region isn't supported. Only as-id 0 is supported. @@ -8437,115 +8435,123 @@ The valid bits in cap.args[0] are: -=================================== ============================================ - KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT - LINT0 register is 0x700 (APIC_MODE_EXTINT). - When this quirk is disabled, the reset value - is 0x10000 (APIC_LVT_MASKED). +======================================== ================================================ +KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT + LINT0 register is 0x700 (APIC_MODE_EXTINT). + When this quirk is disabled, the reset value + is 0x10000 (APIC_LVT_MASKED). - KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on - AMD CPUs to workaround buggy guest firmware - that runs in perpetuity with CR0.CD, i.e. - with caches in "no fill" mode. +KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW on + AMD CPUs to workaround buggy guest firmware + that runs in perpetuity with CR0.CD, i.e. + with caches in "no fill" mode. - When this quirk is disabled, KVM does not - change the value of CR0.CD and CR0.NW. + When this quirk is disabled, KVM does not + change the value of CR0.CD and CR0.NW. - KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is - available even when configured for x2APIC - mode. When this quirk is disabled, KVM - disables the MMIO LAPIC interface if the - LAPIC is in x2APIC mode. +KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is + available even when configured for x2APIC + mode. When this quirk is disabled, KVM + disables the MMIO LAPIC interface if the + LAPIC is in x2APIC mode. - KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before - exiting to userspace for an OUT instruction - to port 0x7e. When this quirk is disabled, - KVM does not pre-increment %rip before - exiting to userspace. +KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before + exiting to userspace for an OUT instruction + to port 0x7e. When this quirk is disabled, + KVM does not pre-increment %rip before + exiting to userspace. - KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets - CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if - IA32_MISC_ENABLE[bit 18] (MWAIT) is set. - Additionally, when this quirk is disabled, - KVM clears CPUID.01H:ECX[bit 3] if - IA32_MISC_ENABLE[bit 18] is cleared. +KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets + CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if + IA32_MISC_ENABLE[bit 18] (MWAIT) is set. + Additionally, when this quirk is disabled, + KVM clears CPUID.01H:ECX[bit 3] if + IA32_MISC_ENABLE[bit 18] is cleared. - KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest - VMMCALL/VMCALL instructions to match the - vendor's hypercall instruction for the - system. When this quirk is disabled, KVM - will no longer rewrite invalid guest - hypercall instructions. Executing the - incorrect hypercall instruction will - generate a #UD within the guest. +KVM_X86_QUIRK_FIX_HYPERCALL_INSN By default, KVM rewrites guest + VMMCALL/VMCALL instructions to match the + vendor's hypercall instruction for the + system. When this quirk is disabled, KVM + will no longer rewrite invalid guest + hypercall instructions. Executing the + incorrect hypercall instruction will + generate a #UD within the guest. -KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if - they are intercepted) as NOPs regardless of - whether or not MONITOR/MWAIT are supported - according to guest CPUID. When this quirk - is disabled and KVM_X86_DISABLE_EXITS_MWAIT - is not set (MONITOR/MWAIT are intercepted), - KVM will inject a #UD on MONITOR/MWAIT if - they're unsupported per guest CPUID. Note, - KVM will modify MONITOR/MWAIT support in - guest CPUID on writes to MISC_ENABLE if - KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is - disabled. +KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if + they are intercepted) as NOPs regardless of + whether or not MONITOR/MWAIT are supported + according to guest CPUID. When this quirk + is disabled and KVM_X86_DISABLE_EXITS_MWAIT + is not set (MONITOR/MWAIT are intercepted), + KVM will inject a #UD on MONITOR/MWAIT if + they're unsupported per guest CPUID. Note, + KVM will modify MONITOR/MWAIT support in + guest CPUID on writes to MISC_ENABLE if + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is + disabled. -KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM - invalidates all SPTEs in all memslots and - address spaces when a memslot is deleted or - moved. When this quirk is disabled (or the - VM type isn't KVM_X86_DEFAULT_VM), KVM only - ensures the backing memory of the deleted - or moved memslot isn't reachable, i.e KVM - _may_ invalidate only SPTEs related to the - memslot. +KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM + invalidates all SPTEs in all memslots and + address spaces when a memslot is deleted or + moved. When this quirk is disabled (or the + VM type isn't KVM_X86_DEFAULT_VM), KVM only + ensures the backing memory of the deleted + or moved memslot isn't reachable, i.e KVM + _may_ invalidate only SPTEs related to the + memslot. -KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the - vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), - MSR_IA32_ARCH_CAPABILITIES (0x10a), - MSR_PLATFORM_INFO (0xce), and all VMX MSRs - (0x480..0x492) to the maximal capabilities - supported by KVM. KVM also sets - MSR_IA32_UCODE_REV (0x8b) to an arbitrary - value (which is different for Intel vs. - AMD). Lastly, when guest CPUID is set (by - userspace), KVM modifies select VMX MSR - fields to force consistency between guest - CPUID and L2's effective ISA. When this - quirk is disabled, KVM zeroes the vCPU's MSR - values (with two exceptions, see below), - i.e. treats the feature MSRs like CPUID - leaves and gives userspace full control of - the vCPU model definition. This quirk does - not affect VMX MSRs CR0/CR4_FIXED1 (0x487 - and 0x489), as KVM does now allow them to - be set by userspace (KVM sets them based on - guest CPUID, for safety purposes). +KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the + vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), + MSR_IA32_ARCH_CAPABILITIES (0x10a), + MSR_PLATFORM_INFO (0xce), and all VMX MSRs + (0x480..0x492) to the maximal capabilities + supported by KVM. KVM also sets + MSR_IA32_UCODE_REV (0x8b) to an arbitrary + value (which is different for Intel vs. + AMD). Lastly, when guest CPUID is set (by + userspace), KVM modifies select VMX MSR + fields to force consistency between guest + CPUID and L2's effective ISA. When this + quirk is disabled, KVM zeroes the vCPU's MSR + values (with two exceptions, see below), + i.e. treats the feature MSRs like CPUID + leaves and gives userspace full control of + the vCPU model definition. This quirk does + not affect VMX MSRs CR0/CR4_FIXED1 (0x487 + and 0x489), as KVM does now allow them to + be set by userspace (KVM sets them based on + guest CPUID, for safety purposes). -KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores - guest PAT and forces the effective memory - type to WB in EPT. The quirk is not available - on Intel platforms which are incapable of - safely honoring guest PAT (i.e., without CPU - self-snoop, KVM always ignores guest PAT and - forces effective memory type to WB). It is - also ignored on AMD platforms or, on Intel, - when a VM has non-coherent DMA devices - assigned; KVM always honors guest PAT in - such case. The quirk is needed to avoid - slowdowns on certain Intel Xeon platforms - (e.g. ICX, SPR) where self-snoop feature is - supported but UC is slow enough to cause - issues with some older guests that use - UC instead of WC to map the video RAM. - Userspace can disable the quirk to honor - guest PAT if it knows that there is no such - guest software, for example if it does not - expose a bochs graphics device (which is - known to have had a buggy driver). -=================================== ============================================ +KVM_X86_QUIRK_IGNORE_GUEST_PAT By default, on Intel platforms, KVM ignores + guest PAT and forces the effective memory + type to WB in EPT. The quirk is not available + on Intel platforms which are incapable of + safely honoring guest PAT (i.e., without CPU + self-snoop, KVM always ignores guest PAT and + forces effective memory type to WB). It is + also ignored on AMD platforms or, on Intel, + when a VM has non-coherent DMA devices + assigned; KVM always honors guest PAT in + such case. The quirk is needed to avoid + slowdowns on certain Intel Xeon platforms + (e.g. ICX, SPR) where self-snoop feature is + supported but UC is slow enough to cause + issues with some older guests that use + UC instead of WC to map the video RAM. + Userspace can disable the quirk to honor + guest PAT if it knows that there is no such + guest software, for example if it does not + expose a bochs graphics device (which is + known to have had a buggy driver). + +KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM By default, KVM relaxes the consistency + check for GUEST_IA32_DEBUGCTL in vmcs12 + to allow FREEZE_IN_SMM to be set. When + this quirk is disabled, KVM requires this + bit to be cleared. Note that the vmcs02 + bit is still completely controlled by the + host, regardless of the quirk setting. +======================================== ================================================ 7.32 KVM_CAP_MAX_VCPU_ID ------------------------
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index ae8bce7..662231e 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst
@@ -17,6 +17,8 @@ - kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock +- vcpu->mutex is taken outside kvm->slots_lock and kvm->slots_arch_lock + - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring them together is quite rare.
diff --git a/MAINTAINERS b/MAINTAINERS index 55af015..c3fe46d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS
@@ -993,10 +993,8 @@ F: drivers/thermal/thermal_mmio.c AMAZON ETHERNET DRIVERS -M: Shay Agroskin <shayagr@amazon.com> M: Arthur Kiyanovski <akiyano@amazon.com> -R: David Arinzon <darinzon@amazon.com> -R: Saeed Bishara <saeedb@amazon.com> +M: David Arinzon <darinzon@amazon.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -1292,7 +1290,6 @@ F: include/uapi/drm/amdxdna_accel.h AMD XGBE DRIVER -M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com> M: Raju Rangoju <Raju.Rangoju@amd.com> L: netdev@vger.kernel.org S: Maintained @@ -3989,7 +3986,7 @@ ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary <corentin.chary@gmail.com> M: Luke D. Jones <luke@ljones.dev> -M: Denis Benato <benato.denis96@gmail.com> +M: Denis Benato <denis.benato@linux.dev> L: platform-driver-x86@vger.kernel.org S: Maintained W: https://asus-linux.org/ @@ -4025,7 +4022,7 @@ ASYMMETRIC KEYS M: David Howells <dhowells@redhat.com> M: Lukas Wunner <lukas@wunner.de> -M: Ignat Korchagin <ignat@cloudflare.com> +M: Ignat Korchagin <ignat@linux.win> L: keyrings@vger.kernel.org L: linux-crypto@vger.kernel.org S: Maintained @@ -4038,7 +4035,7 @@ ASYMMETRIC KEYS - ECDSA M: Lukas Wunner <lukas@wunner.de> -M: Ignat Korchagin <ignat@cloudflare.com> +M: Ignat Korchagin <ignat@linux.win> R: Stefan Berger <stefanb@linux.ibm.com> L: linux-crypto@vger.kernel.org S: Maintained @@ -4048,14 +4045,14 @@ ASYMMETRIC KEYS - GOST M: Lukas Wunner <lukas@wunner.de> -M: Ignat Korchagin <ignat@cloudflare.com> +M: Ignat Korchagin <ignat@linux.win> L: linux-crypto@vger.kernel.org S: Odd fixes F: crypto/ecrdsa* ASYMMETRIC KEYS - RSA M: Lukas Wunner <lukas@wunner.de> -M: Ignat Korchagin <ignat@cloudflare.com> +M: Ignat Korchagin <ignat@linux.win> L: linux-crypto@vger.kernel.org S: Maintained F: crypto/rsa* @@ -4618,7 +4615,6 @@ BLUETOOTH SUBSYSTEM M: Marcel Holtmann <marcel@holtmann.org> -M: Johan Hedberg <johan.hedberg@gmail.com> M: Luiz Augusto von Dentz <luiz.dentz@gmail.com> L: linux-bluetooth@vger.kernel.org S: Supported @@ -6213,20 +6209,20 @@ CISCO SCSI HBA DRIVER M: Karan Tilak Kumar <kartilak@cisco.com> +M: Narsimhulu Musini <nmusini@cisco.com> M: Sesidhar Baddela <sebaddel@cisco.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/snic/ CISCO VIC ETHERNET NIC DRIVER -M: Christian Benvenuti <benve@cisco.com> M: Satish Kharat <satishkh@cisco.com> S: Maintained F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER -M: Christian Benvenuti <benve@cisco.com> M: Nelson Escobar <neescoba@cisco.com> +M: Satish Kharat <satishkh@cisco.com> S: Supported F: drivers/infiniband/hw/usnic/ @@ -6280,7 +6276,7 @@ F: include/linux/clk.h CLOCKSOURCE, CLOCKEVENT DRIVERS -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> M: Thomas Gleixner <tglx@kernel.org> L: linux-kernel@vger.kernel.org S: Supported @@ -6669,7 +6665,7 @@ CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" <rafael@kernel.org> -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> R: Christian Loehle <christian.loehle@arm.com> L: linux-pm@vger.kernel.org S: Maintained @@ -6699,7 +6695,7 @@ CPUIDLE DRIVER - ARM BIG LITTLE M: Lorenzo Pieralisi <lpieralisi@kernel.org> -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -6707,7 +6703,7 @@ F: drivers/cpuidle/cpuidle-big_little.c CPUIDLE DRIVER - ARM EXYNOS -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> M: Kukjin Kim <kgene@kernel.org> R: Krzysztof Kozlowski <krzk@kernel.org> L: linux-pm@vger.kernel.org @@ -8002,7 +7998,9 @@ F: drivers/gpu/drm/tiny/hx8357d.c DRM DRIVER FOR HYPERV SYNTHETIC VIDEO DEVICE -M: Deepak Rawat <drawat.floss@gmail.com> +M: Dexuan Cui <decui@microsoft.com> +M: Long Li <longli@microsoft.com> +M: Saurabh Sengar <ssengar@linux.microsoft.com> L: linux-hyperv@vger.kernel.org L: dri-devel@lists.freedesktop.org S: Maintained @@ -8630,9 +8628,14 @@ F: include/uapi/drm/lima_drm.h DRM DRIVERS FOR LOONGSON -M: Sui Jingfeng <suijingfeng@loongson.cn> +M: Jianmin Lv <lvjianmin@loongson.cn> +M: Qianhai Wu <wuqianhai@loongson.cn> +R: Huacai Chen <chenhuacai@kernel.org> +R: Mingcong Bai <jeffbai@aosc.io> +R: Xi Ruoyao <xry111@xry111.site> +R: Icenowy Zheng <zhengxingda@iscas.ac.cn> L: dri-devel@lists.freedesktop.org -S: Supported +S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/loongson/ @@ -9616,7 +9619,12 @@ EXT4 FILE SYSTEM M: "Theodore Ts'o" <tytso@mit.edu> -M: Andreas Dilger <adilger.kernel@dilger.ca> +R: Andreas Dilger <adilger.kernel@dilger.ca> +R: Baokun Li <libaokun@linux.alibaba.com> +R: Jan Kara <jack@suse.cz> +R: Ojaswin Mujoo <ojaswin@linux.ibm.com> +R: Ritesh Harjani (IBM) <ritesh.list@gmail.com> +R: Zhang Yi <yi.zhang@huawei.com> L: linux-ext4@vger.kernel.org S: Maintained W: http://ext4.wiki.kernel.org @@ -10172,8 +10180,8 @@ FREESCALE IMX / MXC FEC DRIVER M: Wei Fang <wei.fang@nxp.com> +R: Frank Li <frank.li@nxp.com> R: Shenwei Wang <shenwei.wang@nxp.com> -R: Clark Wang <xiaoning.wang@nxp.com> L: imx@lists.linux.dev L: netdev@vger.kernel.org S: Maintained @@ -10485,7 +10493,7 @@ F: Documentation/trace/ftrace* F: arch/*/*/*/*ftrace* F: arch/*/*/*ftrace* -F: include/*/ftrace.h +F: include/*/*ftrace* F: kernel/trace/fgraph.c F: kernel/trace/ftrace* F: samples/ftrace @@ -12012,7 +12020,6 @@ M: Wolfram Sang <wsa+renesas@sang-engineering.com> L: linux-i2c@vger.kernel.org S: Maintained -W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git F: Documentation/i2c/ @@ -12038,7 +12045,6 @@ M: Andi Shyti <andi.shyti@kernel.org> L: linux-i2c@vger.kernel.org S: Maintained -W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git F: Documentation/devicetree/bindings/i2c/ @@ -12217,7 +12223,6 @@ M: Haren Myneni <haren@linux.ibm.com> M: Rick Lindsley <ricklind@linux.ibm.com> R: Nick Child <nnac123@linux.ibm.com> -R: Thomas Falcon <tlfalcon@linux.ibm.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ibm/ibmvnic.* @@ -13943,7 +13948,7 @@ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins <brendan.higgins@linux.dev> -M: David Gow <davidgow@google.com> +M: David Gow <david@davidgow.net> R: Rae Moar <raemoar63@gmail.com> L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com @@ -14412,9 +14417,9 @@ M: Herve Codina <herve.codina@bootlin.com> S: Maintained F: Documentation/devicetree/bindings/net/lantiq,pef2256.yaml -F: drivers/net/wan/framer/pef2256/ +F: drivers/net/wan/framer/ F: drivers/pinctrl/pinctrl-pef2256.c -F: include/linux/framer/pef2256.h +F: include/linux/framer/ LASI 53c700 driver for PARISC M: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> @@ -14763,7 +14768,7 @@ F: drivers/platform/x86/hp/hp_accel.c LIST KUNIT TEST -M: David Gow <davidgow@google.com> +M: David Gow <david@davidgow.net> L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained @@ -15376,10 +15381,8 @@ F: include/linux/soc/marvell/octeontx2/ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) -M: Mirko Lindner <mlindner@marvell.com> -M: Stephen Hemminger <stephen@networkplumber.org> L: netdev@vger.kernel.org -S: Odd fixes +S: Orphan F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER @@ -15476,7 +15479,6 @@ M: Sunil Goutham <sgoutham@marvell.com> M: Linu Cherian <lcherian@marvell.com> M: Geetha sowjanya <gakula@marvell.com> -M: Jerin Jacob <jerinj@marvell.com> M: hariprasad <hkelam@marvell.com> M: Subbaraya Sundeep <sbhatta@marvell.com> L: netdev@vger.kernel.org @@ -15491,7 +15493,7 @@ F: drivers/perf/marvell_pem_pmu.c MARVELL PRESTERA ETHERNET SWITCH DRIVER -M: Taras Chornyi <taras.chornyi@plvision.eu> +M: Elad Nachman <enachman@marvell.com> S: Supported W: https://github.com/Marvell-switching/switchdev-prestera F: drivers/net/ethernet/marvell/prestera/ @@ -16165,7 +16167,6 @@ MEDIATEK ETHERNET DRIVER M: Felix Fietkau <nbd@nbd.name> -M: Sean Wang <sean.wang@mediatek.com> M: Lorenzo Bianconi <lorenzo@kernel.org> L: netdev@vger.kernel.org S: Maintained @@ -16358,8 +16359,6 @@ MEDIATEK SWITCH DRIVER M: Chester A. Unal <chester.a.unal@arinc9.com> M: Daniel Golle <daniel@makrotopia.org> -M: DENG Qingfang <dqfext@gmail.com> -M: Sean Wang <sean.wang@mediatek.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530-mdio.c @@ -16369,7 +16368,6 @@ MEDIATEK T7XX 5G WWAN MODEM DRIVER M: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com> -R: Chiranjeevi Rapolu <chiranjeevi.rapolu@linux.intel.com> R: Liu Haijun <haijun.liu@mediatek.com> R: Ricardo Martinez <ricardo.martinez@linux.intel.com> L: netdev@vger.kernel.org @@ -16654,9 +16652,9 @@ MEMORY MANAGEMENT - CORE M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@kernel.org> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +R: Lorenzo Stoakes <ljs@kernel.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> +R: Vlastimil Babka <vbabka@kernel.org> R: Mike Rapoport <rppt@kernel.org> R: Suren Baghdasaryan <surenb@google.com> R: Michal Hocko <mhocko@suse.com> @@ -16784,9 +16782,9 @@ MEMORY MANAGEMENT - MISC M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@kernel.org> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +R: Lorenzo Stoakes <ljs@kernel.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> +R: Vlastimil Babka <vbabka@kernel.org> R: Mike Rapoport <rppt@kernel.org> R: Suren Baghdasaryan <surenb@google.com> R: Michal Hocko <mhocko@suse.com> @@ -16841,7 +16839,7 @@ MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton <akpm@linux-foundation.org> -M: Vlastimil Babka <vbabka@suse.cz> +M: Vlastimil Babka <vbabka@kernel.org> R: Suren Baghdasaryan <surenb@google.com> R: Michal Hocko <mhocko@suse.com> R: Brendan Jackman <jackmanb@google.com> @@ -16875,7 +16873,7 @@ R: Michal Hocko <mhocko@kernel.org> R: Qi Zheng <zhengqi.arch@bytedance.com> R: Shakeel Butt <shakeel.butt@linux.dev> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +R: Lorenzo Stoakes <ljs@kernel.org> L: linux-mm@kvack.org S: Maintained F: mm/vmscan.c @@ -16884,11 +16882,11 @@ MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@kernel.org> -M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +M: Lorenzo Stoakes <ljs@kernel.org> R: Rik van Riel <riel@surriel.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> -R: Harry Yoo <harry.yoo@oracle.com> +R: Vlastimil Babka <vbabka@kernel.org> +R: Harry Yoo <harry@kernel.org> R: Jann Horn <jannh@google.com> L: linux-mm@kvack.org S: Maintained @@ -16929,7 +16927,7 @@ MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@kernel.org> -M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +M: Lorenzo Stoakes <ljs@kernel.org> R: Zi Yan <ziy@nvidia.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> @@ -16969,7 +16967,7 @@ MEMORY MANAGEMENT - RUST M: Alice Ryhl <aliceryhl@google.com> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +R: Lorenzo Stoakes <ljs@kernel.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> L: linux-mm@kvack.org L: rust-for-linux@vger.kernel.org @@ -16985,8 +16983,8 @@ MEMORY MAPPING M: Andrew Morton <akpm@linux-foundation.org> M: Liam R. Howlett <Liam.Howlett@oracle.com> -M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> +M: Lorenzo Stoakes <ljs@kernel.org> +R: Vlastimil Babka <vbabka@kernel.org> R: Jann Horn <jannh@google.com> R: Pedro Falcato <pfalcato@suse.de> L: linux-mm@kvack.org @@ -17015,8 +17013,8 @@ M: Andrew Morton <akpm@linux-foundation.org> M: Suren Baghdasaryan <surenb@google.com> M: Liam R. Howlett <Liam.Howlett@oracle.com> -M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> +M: Lorenzo Stoakes <ljs@kernel.org> +R: Vlastimil Babka <vbabka@kernel.org> R: Shakeel Butt <shakeel.butt@linux.dev> L: linux-mm@kvack.org S: Maintained @@ -17030,9 +17028,9 @@ MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton <akpm@linux-foundation.org> M: Liam R. Howlett <Liam.Howlett@oracle.com> -M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +M: Lorenzo Stoakes <ljs@kernel.org> M: David Hildenbrand <david@kernel.org> -R: Vlastimil Babka <vbabka@suse.cz> +R: Vlastimil Babka <vbabka@kernel.org> R: Jann Horn <jannh@google.com> L: linux-mm@kvack.org S: Maintained @@ -19227,8 +19225,6 @@ OCELOT ETHERNET SWITCH DRIVER M: Vladimir Oltean <vladimir.oltean@nxp.com> -M: Claudiu Manoil <claudiu.manoil@nxp.com> -M: Alexandre Belloni <alexandre.belloni@bootlin.com> M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Supported @@ -19814,7 +19810,6 @@ F: include/dt-bindings/ OPENCOMPUTE PTP CLOCK DRIVER -M: Jonathan Lemon <jonathan.lemon@gmail.com> M: Vadim Fedorenko <vadim.fedorenko@linux.dev> L: netdev@vger.kernel.org S: Maintained @@ -20122,9 +20117,8 @@ F: drivers/pci/controller/pci-aardvark.c PCI DRIVER FOR ALTERA PCIE IP -M: Joyce Ooi <joyce.ooi@intel.com> L: linux-pci@vger.kernel.org -S: Supported +S: Orphan F: Documentation/devicetree/bindings/pci/altr,pcie-root-port.yaml F: drivers/pci/controller/pcie-altera.c @@ -20369,9 +20363,8 @@ F: Documentation/PCI/pci-error-recovery.rst PCI MSI DRIVER FOR ALTERA MSI IP -M: Joyce Ooi <joyce.ooi@intel.com> L: linux-pci@vger.kernel.org -S: Supported +S: Orphan F: Documentation/devicetree/bindings/interrupt-controller/altr,msi-controller.yaml F: drivers/pci/controller/pcie-altera-msi.c @@ -20509,7 +20502,7 @@ F: drivers/pci/controller/dwc/pcie-kirin.c PCIE DRIVER FOR HISILICON STB -M: Shawn Guo <shawn.guo@linaro.org> +M: Shawn Guo <shawnguo@kernel.org> L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt @@ -21458,9 +21451,8 @@ F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Manish Chopra <manishc@marvell.com> L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/ethernet/qlogic/qed/ F: drivers/net/ethernet/qlogic/qede/ F: include/linux/qed/ @@ -21695,7 +21687,7 @@ F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER -M: Vinod Koul <vkoul@kernel.org> +M: Mohd Ayaan Anwar <mohd.anwar@oss.qualcomm.com> L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained @@ -21955,7 +21947,7 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov <idryomov@gmail.com> -R: Dongsheng Yang <dongsheng.yang@easystack.cn> +R: Dongsheng Yang <dongsheng.yang@linux.dev> L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ @@ -22284,6 +22276,16 @@ S: Orphan F: drivers/net/wireless/rsi/ +RELAY +M: Andrew Morton <akpm@linux-foundation.org> +M: Jens Axboe <axboe@kernel.dk> +M: Jason Xing <kernelxing@tencent.com> +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/filesystems/relay.rst +F: include/linux/relay.h +F: kernel/relay.c + REGISTER MAP ABSTRACTION M: Mark Brown <broonie@kernel.org> L: linux-kernel@vger.kernel.org @@ -23173,8 +23175,8 @@ RUST [ALLOC] M: Danilo Krummrich <dakr@kernel.org> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> -R: Vlastimil Babka <vbabka@suse.cz> +R: Lorenzo Stoakes <ljs@kernel.org> +R: Vlastimil Babka <vbabka@kernel.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Uladzislau Rezki <urezki@gmail.com> L: rust-for-linux@vger.kernel.org @@ -24337,7 +24339,6 @@ F: Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml F: Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml F: drivers/gpio/gpio-sl28cpld.c -F: drivers/hwmon/sa67mcu-hwmon.c F: drivers/hwmon/sl28cpld-hwmon.c F: drivers/irqchip/irq-sl28cpld.c F: drivers/pwm/pwm-sl28cpld.c @@ -24350,12 +24351,13 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR -M: Vlastimil Babka <vbabka@suse.cz> +M: Vlastimil Babka <vbabka@kernel.org> +M: Harry Yoo <harry@kernel.org> M: Andrew Morton <akpm@linux-foundation.org> +R: Hao Li <hao.li@linux.dev> R: Christoph Lameter <cl@gentwo.org> R: David Rientjes <rientjes@google.com> R: Roman Gushchin <roman.gushchin@linux.dev> -R: Harry Yoo <harry.yoo@oracle.com> L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git @@ -24909,9 +24911,9 @@ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus <tudor.ambarus@linaro.org> M: Pratyush Yadav <pratyush@kernel.org> M: Michael Walle <mwalle@kernel.org> +R: Takahiro Kuwano <takahiro.kuwano@infineon.com> L: linux-mtd@lists.infradead.org S: Maintained W: http://www.linux-mtd.infradead.org/ @@ -25766,6 +25768,7 @@ F: include/net/pkt_sched.h F: include/net/sch_priv.h F: include/net/tc_act/ +F: include/net/tc_wrapper.h F: include/uapi/linux/pkt_cls.h F: include/uapi/linux/pkt_sched.h F: include/uapi/linux/tc_act/ @@ -26217,7 +26220,7 @@ THERMAL M: Rafael J. Wysocki <rafael@kernel.org> -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> R: Zhang Rui <rui.zhang@intel.com> R: Lukasz Luba <lukasz.luba@arm.com> L: linux-pm@vger.kernel.org @@ -26247,7 +26250,7 @@ THERMAL/CPU_COOLING M: Amit Daniel Kachhap <amit.kachhap@gmail.com> -M: Daniel Lezcano <daniel.lezcano@linaro.org> +M: Daniel Lezcano <daniel.lezcano@kernel.org> M: Viresh Kumar <viresh.kumar@linaro.org> R: Lukasz Luba <lukasz.luba@arm.com> L: linux-pm@vger.kernel.org @@ -29186,7 +29189,7 @@ ZSWAP COMPRESSED SWAP CACHING M: Johannes Weiner <hannes@cmpxchg.org> -M: Yosry Ahmed <yosry.ahmed@linux.dev> +M: Yosry Ahmed <yosry@kernel.org> M: Nhat Pham <nphamcs@gmail.com> R: Chengming Zhou <chengming.zhou@linux.dev> L: linux-mm@kvack.org
diff --git a/Makefile b/Makefile index e944c6e..4f54c56 100644 --- a/Makefile +++ b/Makefile
@@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -476,6 +476,7 @@ export rust_common_flags := --edition=2021 \ -Zbinary_dep_depinfo=y \ -Astable_features \ + -Aunused_features \ -Dnon_ascii_idents \ -Dunsafe_op_in_unsafe_fn \ -Wmissing_docs \ @@ -1113,6 +1114,9 @@ # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += -fmacro-prefix-map=$(srcroot)/= +ifeq ($(call rustc-option-yn, --remap-path-scope=macro),y) +KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= --remap-path-scope=macro +endif endif # include additional Makefiles when needed @@ -1497,13 +1501,13 @@ $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean endif -PHONY += objtool_clean +PHONY += objtool_clean objtool_mrproper objtool_O = $(abspath $(objtree))/tools/objtool -objtool_clean: +objtool_clean objtool_mrproper: ifneq ($(wildcard $(objtool_O)),) - $(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) clean + $(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) $(patsubst objtool_%,%,$@) endif tools/: FORCE @@ -1650,7 +1654,7 @@ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtbs.S # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -1686,7 +1690,7 @@ $(mrproper-dirs): $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) -mrproper: clean $(mrproper-dirs) +mrproper: clean objtool_mrproper $(mrproper-dirs) $(call cmd,rmfiles) @find . $(RCS_FIND_IGNORE) \ \( -name '*.rmeta' \) \
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 2efa7df..2d136c6 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -71,6 +71,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 61a1b2b..6af6308 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S
@@ -123,6 +123,7 @@ _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index d411abd..2d91664 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -21,6 +21,7 @@ COMMON_DISCARDS *(.ARM.exidx*) *(.ARM.extab*) + *(.modinfo) *(.note.*) *(.rel.*) *(.printk_index)
diff --git a/arch/arm/boot/dts/microchip/sam9x7.dtsi b/arch/arm/boot/dts/microchip/sam9x7.dtsi index 46dacbb..d242d7a 100644 --- a/arch/arm/boot/dts/microchip/sam9x7.dtsi +++ b/arch/arm/boot/dts/microchip/sam9x7.dtsi
@@ -1226,7 +1226,7 @@ pioB: gpio@fffff600 { interrupt-controller; #gpio-cells = <2>; gpio-controller; - #gpio-lines = <26>; + #gpio-lines = <27>; clocks = <&pmc PMC_TYPE_PERIPHERAL 3>; };
diff --git a/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi index f452764..547fb14 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi
@@ -36,12 +36,8 @@ &clks { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi index 58ecdb8..9975b6e 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi
@@ -172,12 +172,8 @@ eth_phy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi index 6f3becd..aa9a442 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
@@ -102,12 +102,8 @@ ethphy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi index f2140dd..85e278e 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
@@ -73,12 +73,8 @@ ethphy: ethernet-phy@3 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi index 131a342..c93dbc5 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi
@@ -260,14 +260,10 @@ fixed-link { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; #address-cells = <1>; #size-cells = <0>; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi index d29adfe..57297d6 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi
@@ -252,13 +252,9 @@ etnphy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; fsl,no-blockmark-swap; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts index 40d530c..2a6bb5f 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts
@@ -133,12 +133,8 @@ ethphy1: ethernet-phy@1 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi index 776f6f7..e34c8cb 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi
@@ -101,12 +101,8 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi index 27e4d2a..a3ea1b2 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi
@@ -63,12 +63,8 @@ ethphy1: ethernet-phy@1 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi index dc53f92..1992dfb 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi
@@ -296,13 +296,9 @@ &fec2 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; fsl,no-blockmark-swap; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c2 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi index eaed2cb..ec3c1e7 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi
@@ -160,15 +160,11 @@ &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; fsl,use-minimum-ecc; + nand-on-flash-bbt; + nand-ecc-mode = "hw"; + nand-ecc-strength = <8>; + nand-ecc-step-size = <512>; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - nand-ecc-mode = "hw"; - nand-ecc-strength = <8>; - nand-ecc-step-size = <512>; - }; }; /* I2C3_SDA/SCL on SODIMM 194/196 (e.g. RTC on carrier board) */
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi index 3dfd43b3..43518bf 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi
@@ -43,15 +43,11 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-ecc-mode = "hw"; + nand-ecc-strength = <0>; + nand-ecc-step-size = <0>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-ecc-mode = "hw"; - nand-ecc-strength = <0>; - nand-ecc-step-size = <0>; - nand-on-flash-bbt; - }; }; &iomuxc {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi index fc298f5..83b9de1 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi
@@ -60,12 +60,8 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &uart1 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts b/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts index 8ec18ea..2d9f495 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts
@@ -25,12 +25,8 @@ usdhc2_pwrseq: usdhc2-pwrseq { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &snvs_poweroff {
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi index a41dc4e..8666dcd 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi
@@ -375,14 +375,10 @@ &gpio7 { /* NAND on such SKUs */ &gpmi { fsl,use-minimum-ecc; + nand-ecc-mode = "hw"; + nand-on-flash-bbt; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; - - nand@0 { - reg = <0>; - nand-ecc-mode = "hw"; - nand-on-flash-bbt; - }; }; /* On-module Power I2C */
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index f75d75c..70d05f7 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig
@@ -279,7 +279,6 @@ CONFIG_TI_CPTS=y CONFIG_TI_KEYSTONE_NETCP=y CONFIG_TI_KEYSTONE_NETCP_ETHSS=y -CONFIG_TI_PRUSS=m CONFIG_TI_PRUETH=m CONFIG_XILINX_EMACLITE=y CONFIG_SFP=m
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index f2e8d4f..5afb725 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -154,6 +154,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ARM_DETAILS ARM_ASSERTS
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index d592a20..c07843c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S
@@ -153,6 +153,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ARM_DETAILS ARM_ASSERTS
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 38dba5f..9ea19b7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig
@@ -252,6 +252,7 @@ select HAVE_RSEQ select HAVE_RUST if RUSTC_SUPPORTS_ARM64 select HAVE_STACKPROTECTOR + select HAVE_STATIC_CALL if CFI select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES
diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi index 9335977..a423020 100644 --- a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi
@@ -901,7 +901,7 @@ r_spi0: spi@7092000 { interrupts = <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>; clocks = <&r_ccu CLK_BUS_R_SPI>, <&r_ccu CLK_R_SPI>; clock-names = "ahb", "mod"; - dmas = <&dma 53>, <&dma 53>; + dmas = <&mcu_dma 13>, <&mcu_dma 13>; dma-names = "rx", "tx"; resets = <&r_ccu RST_BUS_R_SPI>; status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts index 077c5cd..4533a84 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts
@@ -7,7 +7,7 @@ &a53_opp_table { opp-1000000000 { - opp-microvolt = <950000>; + opp-microvolt = <1000000>; }; };
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index eee390c..f5d529c5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
@@ -880,9 +880,9 @@ buck1_reg: BUCK1 { regulator-max-microvolt = <1300000>; regulator-boot-on; regulator-ramp-delay = <1250>; - rohm,dvs-run-voltage = <880000>; - rohm,dvs-idle-voltage = <820000>; - rohm,dvs-suspend-voltage = <810000>; + rohm,dvs-run-voltage = <900000>; + rohm,dvs-idle-voltage = <850000>; + rohm,dvs-suspend-voltage = <850000>; regulator-always-on; }; @@ -892,8 +892,8 @@ buck2_reg: BUCK2 { regulator-max-microvolt = <1300000>; regulator-boot-on; regulator-ramp-delay = <1250>; - rohm,dvs-run-voltage = <950000>; - rohm,dvs-idle-voltage = <850000>; + rohm,dvs-run-voltage = <1000000>; + rohm,dvs-idle-voltage = <900000>; regulator-always-on; }; @@ -902,14 +902,14 @@ buck3_reg: BUCK3 { regulator-min-microvolt = <700000>; regulator-max-microvolt = <1300000>; regulator-boot-on; - rohm,dvs-run-voltage = <850000>; + rohm,dvs-run-voltage = <900000>; }; buck4_reg: BUCK4 { regulator-name = "buck4"; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1300000>; - rohm,dvs-run-voltage = <930000>; + rohm,dvs-run-voltage = <1000000>; }; buck5_reg: BUCK5 { @@ -1448,13 +1448,3 @@ &wdog1 { fsl,ext-reset-output; status = "okay"; }; - -&a53_opp_table { - opp-1000000000 { - opp-microvolt = <850000>; - }; - - opp-1500000000 { - opp-microvolt = <950000>; - }; -};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 607962f..6a25e219 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -1632,7 +1632,7 @@ gpu: gpu@38000000 { <&clk IMX8MQ_GPU_PLL_OUT>, <&clk IMX8MQ_GPU_PLL>; assigned-clock-rates = <800000000>, <800000000>, - <800000000>, <800000000>, <0>; + <800000000>, <400000000>, <0>; power-domains = <&pgc_gpu>; };
diff --git a/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi b/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi index 5792952..c99d7bc 100644 --- a/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi +++ b/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi
@@ -272,20 +272,20 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { /* enable SION for data and cmd pad due to ERR052021 */ pinctrl_usdhc1: usdhc1grp { fsl,pins = /* PD | FSEL 3 | DSE X5 */ - <MX91_PAD_SD1_CLK__USDHC1_CLK 0x5be>, + <MX91_PAD_SD1_CLK__USDHC1_CLK 0x59e>, /* HYS | FSEL 0 | no drive */ <MX91_PAD_SD1_STROBE__USDHC1_STROBE 0x1000>, /* HYS | FSEL 3 | X5 */ - <MX91_PAD_SD1_CMD__USDHC1_CMD 0x400011be>, + <MX91_PAD_SD1_CMD__USDHC1_CMD 0x4000139e>, /* HYS | FSEL 3 | X4 */ - <MX91_PAD_SD1_DATA0__USDHC1_DATA0 0x4000119e>, - <MX91_PAD_SD1_DATA1__USDHC1_DATA1 0x4000119e>, - <MX91_PAD_SD1_DATA2__USDHC1_DATA2 0x4000119e>, - <MX91_PAD_SD1_DATA3__USDHC1_DATA3 0x4000119e>, - <MX91_PAD_SD1_DATA4__USDHC1_DATA4 0x4000119e>, - <MX91_PAD_SD1_DATA5__USDHC1_DATA5 0x4000119e>, - <MX91_PAD_SD1_DATA6__USDHC1_DATA6 0x4000119e>, - <MX91_PAD_SD1_DATA7__USDHC1_DATA7 0x4000119e>; + <MX91_PAD_SD1_DATA0__USDHC1_DATA0 0x4000139e>, + <MX91_PAD_SD1_DATA1__USDHC1_DATA1 0x4000139e>, + <MX91_PAD_SD1_DATA2__USDHC1_DATA2 0x4000139e>, + <MX91_PAD_SD1_DATA3__USDHC1_DATA3 0x4000139e>, + <MX91_PAD_SD1_DATA4__USDHC1_DATA4 0x4000139e>, + <MX91_PAD_SD1_DATA5__USDHC1_DATA5 0x4000139e>, + <MX91_PAD_SD1_DATA6__USDHC1_DATA6 0x4000139e>, + <MX91_PAD_SD1_DATA7__USDHC1_DATA7 0x4000139e>; }; pinctrl_wdog: wdoggrp {
diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts index 0852067..197c8f8 100644 --- a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts +++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts
@@ -507,6 +507,7 @@ &usdhc1 { pinctrl-2 = <&pinctrl_usdhc1_200mhz>; bus-width = <8>; non-removable; + fsl,tuning-step = <1>; status = "okay"; }; @@ -519,6 +520,7 @@ &usdhc2 { vmmc-supply = <®_usdhc2_vmmc>; bus-width = <4>; no-mmc; + fsl,tuning-step = <1>; status = "okay"; };
diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi index 3a23e2e..ce34a29 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi
@@ -271,21 +271,21 @@ MX93_PAD_SD2_RESET_B__GPIO3_IO07 0x106 /* enable SION for data and cmd pad due to ERR052021 */ pinctrl_usdhc1: usdhc1grp { fsl,pins = < - /* PD | FSEL 3 | DSE X5 */ - MX93_PAD_SD1_CLK__USDHC1_CLK 0x5be + /* PD | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_CLK__USDHC1_CLK 0x59e /* HYS | FSEL 0 | no drive */ MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x1000 - /* HYS | FSEL 3 | X5 */ - MX93_PAD_SD1_CMD__USDHC1_CMD 0x400011be - /* HYS | FSEL 3 | X4 */ - MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x4000119e - MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x4000119e - MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x4000119e - MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x4000119e - MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x4000119e - MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x4000119e - MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x4000119e - MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x4000119e + /* HYS | PU | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_CMD__USDHC1_CMD 0x4000139e + /* HYS | PU | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x4000139e + MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x4000139e + MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x4000139e + MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x4000139e + MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x4000139e + MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x4000139e + MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x4000139e + MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x4000139e >; };
diff --git a/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts b/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts index 7d370da..579d55d 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts +++ b/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts
@@ -179,7 +179,7 @@ &ohci { }; &pcie { - reset-gpios = <&gpio4 4 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio4 4 GPIO_ACTIVE_LOW>; vpcie-supply = <®_pcie>; status = "okay"; };
diff --git a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi index f6bc001..2f4ad5d 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi
@@ -122,6 +122,7 @@ soc: soc@f0000000 { #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0xf0000000 0x10000000>; + dma-ranges = <0x0 0x0 0x0 0x40000000>; crg: clock-reset-controller@8a22000 { compatible = "hisilicon,hi3798cv200-crg", "syscon", "simple-mfd";
diff --git a/arch/arm64/boot/dts/qcom/agatti.dtsi b/arch/arm64/boot/dts/qcom/agatti.dtsi index 76b93b7..893cb06 100644 --- a/arch/arm64/boot/dts/qcom/agatti.dtsi +++ b/arch/arm64/boot/dts/qcom/agatti.dtsi
@@ -1669,8 +1669,7 @@ gpu: gpu@5900000 { &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>; interconnect-names = "gfx-mem"; - iommus = <&adreno_smmu 0 1>, - <&adreno_smmu 2 0>; + iommus = <&adreno_smmu 0 1>; operating-points-v2 = <&gpu_opp_table>; power-domains = <&rpmpd QCM2290_VDDCX>; qcom,gmu = <&gmu_wrapper>; @@ -1951,8 +1950,7 @@ mdss: display-subsystem@5e00000 { power-domains = <&dispcc MDSS_GDSC>; - iommus = <&apps_smmu 0x420 0x2>, - <&apps_smmu 0x421 0x0>; + iommus = <&apps_smmu 0x420 0x2>; interconnects = <&mmrt_virt MASTER_MDP0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG @@ -2436,10 +2434,7 @@ venus: video-codec@5a00000 { memory-region = <&pil_video_mem>; iommus = <&apps_smmu 0x860 0x0>, - <&apps_smmu 0x880 0x0>, - <&apps_smmu 0x861 0x04>, - <&apps_smmu 0x863 0x0>, - <&apps_smmu 0x804 0xe0>; + <&apps_smmu 0x880 0x0>; interconnects = <&mmnrt_virt MASTER_VIDEO_P0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>,
diff --git a/arch/arm64/boot/dts/qcom/hamoa.dtsi b/arch/arm64/boot/dts/qcom/hamoa.dtsi index db65c39..4b0784a 100644 --- a/arch/arm64/boot/dts/qcom/hamoa.dtsi +++ b/arch/arm64/boot/dts/qcom/hamoa.dtsi
@@ -269,7 +269,7 @@ cluster_c4: cpu-sleep-0 { idle-state-name = "ret"; arm,psci-suspend-param = <0x00000004>; entry-latency-us = <180>; - exit-latency-us = <500>; + exit-latency-us = <320>; min-residency-us = <600>; }; };
diff --git a/arch/arm64/boot/dts/qcom/monaco.dtsi b/arch/arm64/boot/dts/qcom/monaco.dtsi index 5d2df43..0cb9fd1 100644 --- a/arch/arm64/boot/dts/qcom/monaco.dtsi +++ b/arch/arm64/boot/dts/qcom/monaco.dtsi
@@ -765,6 +765,11 @@ smem_mem: smem@90900000 { hwlocks = <&tcsr_mutex 3>; }; + gunyah_md_mem: gunyah-md-region@91a80000 { + reg = <0x0 0x91a80000 0x0 0x80000>; + no-map; + }; + lpass_machine_learning_mem: lpass-machine-learning-region@93b00000 { reg = <0x0 0x93b00000 0x0 0xf00000>; no-map; @@ -6414,12 +6419,12 @@ qup_uart10_cts: qup-uart10-cts-state { }; qup_uart10_rts: qup-uart10-rts-state { - pins = "gpio84"; + pins = "gpio85"; function = "qup1_se2"; }; qup_uart10_tx: qup-uart10-tx-state { - pins = "gpio85"; + pins = "gpio86"; function = "qup1_se2"; };
diff --git a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts index 089a027..b2f00e1 100644 --- a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts +++ b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts
@@ -177,7 +177,7 @@ wcd9370: audio-codec-0 { pinctrl-0 = <&wcd_default>; pinctrl-names = "default"; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p7>; vdd-rxtx-supply = <&vreg_l18b_1p8>;
diff --git a/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi b/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi index 8e5c557..0a382cc 100644 --- a/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi
@@ -1032,9 +1032,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1048,10 +1045,12 @@ &pcie4_phy { status = "okay"; }; -&pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +&pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; +}; +&pcie6a { vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1067,6 +1066,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10";
diff --git a/arch/arm64/boot/dts/qcom/x1-crd.dtsi b/arch/arm64/boot/dts/qcom/x1-crd.dtsi index ded96fb..2fbf9ec 100644 --- a/arch/arm64/boot/dts/qcom/x1-crd.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-crd.dtsi
@@ -1216,15 +1216,17 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; status = "okay"; }; +&pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; +}; + &pcie4_phy { vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; @@ -1233,9 +1235,6 @@ &pcie4_phy { }; &pcie5 { - perst-gpios = <&tlmm 149 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_wwan>; pinctrl-0 = <&pcie5_default>; @@ -1251,10 +1250,12 @@ &pcie5_phy { status = "okay"; }; -&pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +&pcie5_port0 { + reset-gpios = <&tlmm 149 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>; +}; +&pcie6a { vddpe-3v3-supply = <&vreg_nvme>; pinctrl-names = "default"; @@ -1270,6 +1271,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { kypd_vol_up_n: kypd-vol-up-n-state { pins = "gpio6";
diff --git a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi index bf04a12..217ca8c 100644 --- a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi
@@ -1081,9 +1081,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1098,6 +1095,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1115,9 +1115,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1126,6 +1123,11 @@ &pcie6a { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pcie6a_phy { vdda-phy-supply = <&vreg_l1d_0p8>; vdda-pll-supply = <&vreg_l2j_1p2>;
diff --git a/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi b/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi index a407543..4106394 100644 --- a/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi
@@ -1065,9 +1065,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1082,6 +1079,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1099,9 +1099,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1110,6 +1107,11 @@ &pcie6a { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pcie6a_phy { vdda-phy-supply = <&vreg_l1d_0p8>; vdda-pll-supply = <&vreg_l2j_1p2>;
diff --git a/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi b/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi index d77be02..ba6b7b5 100644 --- a/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi
@@ -964,9 +964,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -982,6 +979,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10";
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index d6472e5..d7938d3 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts
@@ -1126,9 +1126,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1143,6 +1140,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>;
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts b/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts index 20a33e6..eec5f2f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts
@@ -1033,9 +1033,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1050,6 +1047,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1067,10 +1067,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1086,6 +1082,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10";
diff --git a/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts b/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts index 1e5eb8c..06747b5 100644 --- a/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts +++ b/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts
@@ -1131,9 +1131,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1148,6 +1145,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1165,9 +1165,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1183,6 +1180,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_pwm { status = "okay"; };
diff --git a/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts b/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts index ff07d98..812b133 100644 --- a/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts +++ b/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts
@@ -118,6 +118,17 @@ memory@600000000 { reg = <0x6 0x00000000 0x1 0x00000000>; }; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + tfa@40000000 { + reg = <0x0 0x40000000 0x0 0x8000000>; + no-map; + }; + }; + /* Page 27 / DSI to Display */ dp-con { compatible = "dp-connector";
diff --git a/arch/arm64/boot/dts/renesas/r8a78000.dtsi b/arch/arm64/boot/dts/renesas/r8a78000.dtsi index 4c97298..3e1c989 100644 --- a/arch/arm64/boot/dts/renesas/r8a78000.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a78000.dtsi
@@ -698,7 +698,7 @@ scif0: serial@c0700000 { compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0700000 0 0x40>; - interrupts = <GIC_SPI 4074 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 10 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -708,7 +708,7 @@ scif1: serial@c0704000 { compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0704000 0 0x40>; - interrupts = <GIC_SPI 4075 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 11 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -718,7 +718,7 @@ scif3: serial@c0708000 { compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0708000 0 0x40>; - interrupts = <GIC_SPI 4076 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 12 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -728,7 +728,7 @@ scif4: serial@c070c000 { compatible = "renesas,scif-r8a78000", "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc070c000 0 0x40>; - interrupts = <GIC_SPI 4077 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 13 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -738,7 +738,7 @@ hscif0: serial@c0710000 { compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0710000 0 0x60>; - interrupts = <GIC_SPI 4078 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -748,7 +748,7 @@ hscif1: serial@c0714000 { compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0714000 0 0x60>; - interrupts = <GIC_SPI 4079 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 15 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -758,7 +758,7 @@ hscif2: serial@c0718000 { compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc0718000 0 0x60>; - interrupts = <GIC_SPI 4080 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 16 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; @@ -768,7 +768,7 @@ hscif3: serial@c071c000 { compatible = "renesas,hscif-r8a78000", "renesas,rcar-gen5-hscif", "renesas,hscif"; reg = <0 0xc071c000 0 0x60>; - interrupts = <GIC_SPI 4081 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_ESPI 17 IRQ_TYPE_LEVEL_HIGH>; clocks = <&dummy_clk_sgasyncd4>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled";
diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi index 80cba9f..504c283 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi
@@ -581,16 +581,6 @@ ostm7: timer@12c03000 { status = "disabled"; }; - wdt0: watchdog@11c00400 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x11c00400 0 0x400>; - clocks = <&cpg CPG_MOD 0x4b>, <&cpg CPG_MOD 0x4c>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x75>; - power-domains = <&cpg>; - status = "disabled"; - }; - wdt1: watchdog@14400000 { compatible = "renesas,r9a09g057-wdt"; reg = <0 0x14400000 0 0x400>; @@ -601,26 +591,6 @@ wdt1: watchdog@14400000 { status = "disabled"; }; - wdt2: watchdog@13000000 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x13000000 0 0x400>; - clocks = <&cpg CPG_MOD 0x4f>, <&cpg CPG_MOD 0x50>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x77>; - power-domains = <&cpg>; - status = "disabled"; - }; - - wdt3: watchdog@13000400 { - compatible = "renesas,r9a09g057-wdt"; - reg = <0 0x13000400 0 0x400>; - clocks = <&cpg CPG_MOD 0x51>, <&cpg CPG_MOD 0x52>; - clock-names = "pclk", "oscclk"; - resets = <&cpg 0x78>; - power-domains = <&cpg>; - status = "disabled"; - }; - rtc: rtc@11c00800 { compatible = "renesas,r9a09g057-rtca3", "renesas,rz-rtca3"; reg = <0 0x11c00800 0 0x400>;
diff --git a/arch/arm64/boot/dts/renesas/r9a09g077.dtsi b/arch/arm64/boot/dts/renesas/r9a09g077.dtsi index 14d7fb6..9d0b4d8 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g077.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g077.dtsi
@@ -974,8 +974,8 @@ mii_conv3: mii-conv@3 { cpg: clock-controller@80280000 { compatible = "renesas,r9a09g077-cpg-mssr"; - reg = <0 0x80280000 0 0x1000>, - <0 0x81280000 0 0x9000>; + reg = <0 0x80280000 0 0x10000>, + <0 0x81280000 0 0x10000>; clocks = <&extal_clk>; clock-names = "extal"; #clock-cells = <2>;
diff --git a/arch/arm64/boot/dts/renesas/r9a09g087.dtsi b/arch/arm64/boot/dts/renesas/r9a09g087.dtsi index 4a13395..d407c48 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g087.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g087.dtsi
@@ -977,8 +977,8 @@ mii_conv3: mii-conv@3 { cpg: clock-controller@80280000 { compatible = "renesas,r9a09g087-cpg-mssr"; - reg = <0 0x80280000 0 0x1000>, - <0 0x81280000 0 0x9000>; + reg = <0 0x80280000 0 0x10000>, + <0 0x81280000 0 0x10000>; clocks = <&extal_clk>; clock-names = "extal"; #clock-cells = <2>;
diff --git a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi index 982f17a..b45acfe 100644 --- a/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi +++ b/arch/arm64/boot/dts/renesas/rzg3s-smarc-som.dtsi
@@ -162,7 +162,7 @@ versa3: clock-generator@68 { <100000000>; renesas,settings = [ 80 00 11 19 4c 42 dc 2f 06 7d 20 1a 5f 1e f2 27 - 00 40 00 00 00 00 00 00 06 0c 19 02 3f f0 90 86 + 00 40 00 00 00 00 00 00 06 0c 19 02 3b f0 90 86 a0 80 30 30 9c ]; };
diff --git a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi index 510399f..f87c249 100644 --- a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi +++ b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi
@@ -53,6 +53,7 @@ vqmmc_sdhi0: regulator-vqmmc-sdhi0 { regulator-max-microvolt = <3300000>; gpios-states = <0>; states = <3300000 0>, <1800000 1>; + regulator-ramp-delay = <60>; }; #endif
diff --git a/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso b/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso index 0af1e0a..fc53c1a 100644 --- a/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso +++ b/arch/arm64/boot/dts/renesas/rzv2-evk-cn15-sd.dtso
@@ -25,6 +25,7 @@ vqmmc_sdhi0: regulator-vqmmc-sdhi0 { regulator-max-microvolt = <3300000>; gpios-states = <0>; states = <3300000 0>, <1800000 1>; + regulator-ramp-delay = <60>; }; };
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 753d513..ae937a3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
@@ -879,12 +879,6 @@ vcc5v0_host_en_pin: vcc5v0-host-en-pin { }; }; - wifi { - wifi_host_wake_l: wifi-host-wake-l { - rockchip,pins = <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; - }; - }; - wireless-bluetooth { bt_wake_pin: bt-wake-pin { rockchip,pins = <2 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>; @@ -942,19 +936,7 @@ &sdio0 { pinctrl-names = "default"; pinctrl-0 = <&sdio0_bus4 &sdio0_cmd &sdio0_clk>; sd-uhs-sdr104; - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - brcmf: wifi@1 { - compatible = "brcm,bcm4329-fmac"; - reg = <1>; - interrupt-parent = <&gpio0>; - interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "host-wake"; - pinctrl-names = "default"; - pinctrl-0 = <&wifi_host_wake_l>; - }; }; &sdhci {
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index cb87c8f..00530b2 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -76,19 +76,24 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_aes_ctx rk; + struct crypto_aes_ctx *rk; int err; - err = aes_expandkey(&rk, in_key, key_len); + rk = kmalloc(sizeof(*rk), GFP_KERNEL); + if (!rk) + return -ENOMEM; + + err = aes_expandkey(rk, in_key, key_len); if (err) - return err; + goto out; ctx->rounds = 6 + key_len / 4; scoped_ksimd() - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); - - return 0; + aesbs_convert_key(ctx->rk, rk->key_enc, ctx->rounds); +out: + kfree_sensitive(rk); + return err; } static int __ecb_crypt(struct skcipher_request *req, @@ -133,22 +138,26 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_aes_ctx rk; + struct crypto_aes_ctx *rk; int err; - err = aes_expandkey(&rk, in_key, key_len); + rk = kmalloc(sizeof(*rk), GFP_KERNEL); + if (!rk) + return -ENOMEM; + + err = aes_expandkey(rk, in_key, key_len); if (err) - return err; + goto out; ctx->key.rounds = 6 + key_len / 4; - memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + memcpy(ctx->enc, rk->key_enc, sizeof(ctx->enc)); scoped_ksimd() - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); - memzero_explicit(&rk, sizeof(rk)); - - return 0; + aesbs_convert_key(ctx->key.rk, rk->key_enc, ctx->key.rounds); +out: + kfree_sensitive(rk); + return err; } static int cbc_encrypt(struct skcipher_request *req)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d7a5407..6cf3cd6 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h
@@ -91,8 +91,9 @@ __XCHG_GEN(_mb) #define __xchg_wrapper(sfx, ptr, x) \ ({ \ __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret = (__force __typeof__(*(ptr))) \ + __arch_xchg##sfx((__force unsigned long)(x), (ptr), \ + sizeof(*(ptr))); \ __ret; \ }) @@ -175,9 +176,10 @@ __CMPXCHG_GEN(_mb) #define __cmpxchg_wrapper(sfx, ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg##sfx((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ + __ret = (__force __typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (__force unsigned long)(o), \ + (__force unsigned long)(n), \ + sizeof(*(ptr))); \ __ret; \ })
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 83e03ab..8cbd1e9 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h
@@ -264,19 +264,33 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count) typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, pgprot_t *prot); int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); +void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot); +static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size, + pgprot_t user_prot) +{ + pgprot_t prot; + ptdesc_t user_prot_val = pgprot_val(user_prot); + + if (WARN_ON_ONCE(!(user_prot_val & PTE_USER))) + return NULL; + + prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK, + user_prot_val & PTE_ATTRINDX_MASK); + return __ioremap_prot(phys, size, prot); +} #define ioremap_prot ioremap_prot -#define _PAGE_IOREMAP PROT_DEVICE_nGnRE - +#define ioremap(addr, size) \ + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) + __ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ - ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) + __ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) #define ioremap_encrypted(addr, size) \ - ioremap_prot((addr), (size), PAGE_KERNEL) + __ioremap_prot((addr), (size), PAGE_KERNEL) /* * io{read,write}{16,32,64}be() macros @@ -297,7 +311,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(addr))) return (void __iomem *)__phys_to_virt(addr); - return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); + return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); } /*
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d5a3bb..70cb9cf 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h
@@ -784,6 +784,9 @@ struct kvm_host_data { /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */ unsigned int debug_brps; unsigned int debug_wrps; + + /* Last vgic_irq part of the AP list recorded in an LR */ + struct vgic_irq *last_lr_irq; }; struct kvm_host_psci_config { @@ -1616,7 +1619,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) #define kvm_has_s1poe(k) \ - (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) + (system_supports_poe() && \ + kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) #define kvm_has_ras(k) \ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 905c658..091544e 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h
@@ -397,6 +397,8 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu); void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val); +u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime); + #define vncr_fixmap(c) \ ({ \ u32 __c = (c); \
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index d27e887..f560e64 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -50,11 +50,11 @@ #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _PAGE_KERNEL (PROT_NORMAL) -#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) -#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define _PAGE_KERNEL (PROT_NORMAL | PTE_DIRTY) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY | PTE_DIRTY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY | PTE_DIRTY) +#define _PAGE_KERNEL_EXEC ((PROT_NORMAL & ~PTE_PXN) | PTE_DIRTY) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT | PTE_DIRTY) #define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) @@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void) #define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) #define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) -#define PAGE_GCS __pgprot(_PAGE_GCS) -#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) - #define PIE_E0 ( \ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h index be59156..c3dbd3a 100644 --- a/arch/arm64/include/asm/runtime-const.h +++ b/arch/arm64/include/asm/runtime-const.h
@@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #include <asm/cacheflush.h> /* Sigh. You can still run arm64 in BE mode */
diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h new file mode 100644 index 0000000..30f6a04 --- /dev/null +++ b/arch/arm64/include/asm/static_call.h
@@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STATIC_CALL_H +#define _ASM_STATIC_CALL_H + +#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, target) \ + asm(" .pushsection .static_call.text, \"ax\" \n" \ + " .align 4 \n" \ + " .globl " name " \n" \ + name ": \n" \ + " hint 34 /* BTI C */ \n" \ + " adrp x16, 1f \n" \ + " ldr x16, [x16, :lo12:1f] \n" \ + " br x16 \n" \ + " .type " name ", %function \n" \ + " .size " name ", . - " name " \n" \ + " .popsection \n" \ + " .pushsection .rodata, \"a\" \n" \ + " .align 3 \n" \ + "1: .quad " target " \n" \ + " .popsection \n") + +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(STATIC_CALL_TRAMP_STR(name), #func) + +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) + +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) + +#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a2d65d7..1416e65 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h
@@ -31,19 +31,11 @@ */ #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ "tlbi " #op "\n" \ - ALTERNATIVE("nop\n nop", \ - "dsb ish\n tlbi " #op, \ - ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ - "tlbi " #op ", %0\n" \ - ALTERNATIVE("nop\n nop", \ - "dsb ish\n tlbi " #op ", %0", \ - ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ - : : "r" (arg)) + "tlbi " #op ", %x0\n" \ + : : "rZ" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) @@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void) (__pages >> (5 * (scale) + 1)) - 1; \ }) +#define __repeat_tlbi_sync(op, arg...) \ +do { \ + if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \ + break; \ + __tlbi(op, ##arg); \ + dsb(ish); \ +} while (0) + +/* + * Complete broadcast TLB maintenance issued by the host which invalidates + * stage 1 information in the host's own translation regime. + */ +static inline void __tlbi_sync_s1ish(void) +{ + dsb(ish); + __repeat_tlbi_sync(vale1is, 0); +} + +/* + * Complete broadcast TLB maintenance issued by hyp code which invalidates + * stage 1 translation information in any translation regime. + */ +static inline void __tlbi_sync_s1ish_hyp(void) +{ + dsb(ish); + __repeat_tlbi_sync(vale2is, 0); +} + /* * TLB Invalidation * ================ @@ -279,7 +299,7 @@ static inline void flush_tlb_all(void) { dsb(ishst); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish(); isb(); } @@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); - dsb(ish); + __tlbi_sync_s1ish(); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } @@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { flush_tlb_page_nosync(vma, uaddr); - dsb(ish); + __tlbi_sync_s1ish(); } static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) { - /* - * TLB flush deferral is not required on systems which are affected by - * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation - * will have two consecutive TLBI instructions with a dsb(ish) in between - * defeating the purpose (i.e save overall 'dsb ish' cost). - */ - if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) - return false; - return true; } @@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) */ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { - dsb(ish); + __tlbi_sync_s1ish(); } /* @@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, { __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, last_level, tlb_level); - dsb(ish); + __tlbi_sync_s1ish(); } static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, @@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); __flush_tlb_range_op(vaale1is, start, pages, stride, 0, TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); - dsb(ish); + __tlbi_sync_s1ish(); isb(); } @@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ishst); __tlbi(vaae1is, addr); - dsb(ish); + __tlbi_sync_s1ish(); isb(); }
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 76f32e4..fe62710 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index af90128..a9d884f 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c
@@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return ioremap_prot(phys, size, prot); + return __ioremap_prot(phys, size, prot); } /*
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c31f8e1..32c2dbc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c
@@ -2345,6 +2345,15 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, !is_midr_in_range_list(has_vgic_v3)) return false; + /* + * pKVM prevents late onlining of CPUs. This means that whatever + * state the capability is in after deprivilege cannot be affected + * by a new CPU booting -- this is garanteed to be a CPU we have + * already seen, and the cap is therefore unchanged. + */ + if (system_capabilities_finalized() && is_protected_kvm_enabled()) + return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR); + if (is_kernel_in_hyp_mode()) res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); else
diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index bbe7d30..dac568e 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -192,6 +192,14 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, size -= 2; break; + case DW_CFA_advance_loc4: + loc += *opcode++ * code_alignment_factor; + loc += (*opcode++ << 8) * code_alignment_factor; + loc += (*opcode++ << 16) * code_alignment_factor; + loc += (*opcode++ << 24) * code_alignment_factor; + size -= 4; + break; + case DW_CFA_def_cfa: case DW_CFA_offset_extended: size = skip_xleb128(&opcode, size);
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c index c64a06f..9e846ce 100644 --- a/arch/arm64/kernel/rsi.c +++ b/arch/arm64/kernel/rsi.c
@@ -12,6 +12,7 @@ #include <asm/io.h> #include <asm/mem_encrypt.h> +#include <asm/pgtable.h> #include <asm/rsi.h> static struct realm_config config; @@ -146,7 +147,7 @@ void __init arm64_rsi_init(void) return; if (WARN_ON(rsi_get_realm_config(&config))) return; - prot_ns_shared = BIT(config.ipa_bits - 1); + prot_ns_shared = __phys_to_pte_val(BIT(config.ipa_bits - 1)); if (arm64_ioremap_prot_hook_register(realm_ioremap_hook)) return;
diff --git a/arch/arm64/kernel/static_call.c b/arch/arm64/kernel/static_call.c new file mode 100644 index 0000000..8b3a19e --- /dev/null +++ b/arch/arm64/kernel/static_call.c
@@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/static_call.h> +#include <linux/memory.h> +#include <asm/text-patching.h> + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) +{ + u64 literal; + int ret; + + if (!func) + func = __static_call_return0; + + /* decode the instructions to discover the literal address */ + literal = ALIGN_DOWN((u64)tramp + 4, SZ_4K) + + aarch64_insn_adrp_get_offset(le32_to_cpup(tramp + 4)) + + 8 * aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12, + le32_to_cpup(tramp + 8)); + + ret = aarch64_insn_write_literal_u64((void *)literal, (u64)func); + WARN_ON_ONCE(ret); +} +EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 4a609e9..b9d4998 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c
@@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) * We pick the reserved-ASID to minimise the impact. */ __tlbi(aside1is, __TLBI_VADDR(0, 0)); - dsb(ish); + __tlbi_sync_s1ish(); } ret = caches_clean_inval_user_pou(start, start + chunk);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 3fe1faa..b32f133 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c
@@ -400,16 +400,25 @@ static inline int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val) { /* - * Abort call on counterless CPU or when interrupts are - * disabled - can lead to deadlock in smp sync call. + * Abort call on counterless CPU. */ if (!cpu_has_amu_feat(cpu)) return -EOPNOTSUPP; - if (WARN_ON_ONCE(irqs_disabled())) - return -EPERM; - - smp_call_function_single(cpu, func, val, 1); + if (irqs_disabled()) { + /* + * When IRQs are disabled (tick path: sched_tick -> + * topology_scale_freq_tick or cppc_scale_freq_tick), only local + * CPU counter reads are allowed. Remote CPU counter read would + * require smp_call_function_single() which is unsafe with IRQs + * disabled. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) + return -EPERM; + func(val); + } else { + smp_call_function_single(cpu, func, val, 1); + } return 0; }
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ad6133b..2d1e752 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -191,6 +191,7 @@ LOCK_TEXT KPROBES_TEXT HYPERVISOR_TEXT + STATIC_CALL_TEXT *(.gnu.warning) } @@ -349,6 +350,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS HEAD_SYMBOLS
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4f803fd..7d1f22f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig
@@ -21,7 +21,6 @@ bool "Kernel-based Virtual Machine (KVM) support" select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING - select KVM_GENERIC_MMU_NOTIFIER select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 29f0326..410ffd4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c
@@ -358,7 +358,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_IOEVENTFD: case KVM_CAP_USER_MEMORY: - case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI:
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 885bd5b..a024d9a 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c
@@ -540,31 +540,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); - if (wr->nG) { - u64 asid_ttbr, tcr; - - switch (wi->regime) { - case TR_EL10: - tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); - asid_ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL1) : - vcpu_read_sys_reg(vcpu, TTBR0_EL1)); - break; - case TR_EL20: - tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - asid_ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - break; - default: - BUG(); - } - - wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - wr->asid &= GENMASK(7, 0); - } + if (wr->nG) + wr->asid = get_asid_by_regime(vcpu, wi->regime); return 0; @@ -1527,8 +1504,6 @@ int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail = true; } - isb(); - if (!fail) par = read_sysreg_par(); @@ -1778,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new) if (!writable) return -EPERM; - ptep = (u64 __user *)hva + offset; + ptep = (void __user *)hva + offset; if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) r = __lse_swap_desc(ptep, old, new); else
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1c87699..332c453 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c
@@ -29,7 +29,7 @@ #include "trace.h" -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -42,7 +42,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, hvc_exit_stat), STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 38f66a5..d815265 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -518,7 +518,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - if (!range_included(&cur, range)) + if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL) continue; *range = cur; return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index ae8391b..2189762 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) */ dsb(ishst); __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); }
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8e29d77..2f029bf 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -342,6 +342,7 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc /* No restrictions for non-protected VMs. */ if (!kvm_vm_is_protected(kvm)) { hyp_vm->kvm.arch.flags = host_arch_flags; + hyp_vm->kvm.arch.flags &= ~BIT_ULL(KVM_ARCH_FLAG_ID_REGS_INITIALIZED); bitmap_copy(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, @@ -391,7 +392,7 @@ static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu) if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE)) return; - sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state); + sve_state = hyp_vcpu->vcpu.arch.sve_state; hyp_unpin_shared_mem(sve_state, sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu)); } @@ -471,6 +472,35 @@ static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *h return ret; } +static int vm_copy_id_regs(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + const struct kvm *host_kvm = hyp_vm->host_kvm; + struct kvm *kvm = &hyp_vm->kvm; + + if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &host_kvm->arch.flags)) + return -EINVAL; + + if (test_and_set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return 0; + + memcpy(kvm->arch.id_regs, host_kvm->arch.id_regs, sizeof(kvm->arch.id_regs)); + + return 0; +} + +static int pkvm_vcpu_init_sysregs(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + int ret = 0; + + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); + else + ret = vm_copy_id_regs(hyp_vcpu); + + return ret; +} + static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, struct pkvm_hyp_vm *hyp_vm, struct kvm_vcpu *host_vcpu) @@ -490,8 +520,9 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; - if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) - kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); + ret = pkvm_vcpu_init_sysregs(hyp_vcpu); + if (ret) + goto done; ret = pkvm_vcpu_init_traps(hyp_vcpu); if (ret)
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 48da9ca..3dc1ce0 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, */ dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void) /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); }
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0e4ddd2..9b480f9 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, *unmapped += granule; } - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index ec25698..35855da 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, */ dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); __tlbi(vmalle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) enter_vmid_context(mmu, &cxt); __tlbi(vmalls12e1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); exit_vmid_context(&cxt); @@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void) { dsb(ishst); __tlbi(alle1is); - dsb(ish); + __tlbi_sync_s1ish_hyp(); } /* @@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) default: ret = -EINVAL; } - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); if (mmu)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 070a01e..17d64a1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c
@@ -1751,17 +1751,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, force_pte = (max_map_size == PAGE_SIZE); vma_pagesize = min_t(long, vma_pagesize, max_map_size); + vma_shift = __ffs(vma_pagesize); } /* - * Both the canonical IPA and fault IPA must be hugepage-aligned to - * ensure we find the right PFN and lay down the mapping in the right - * place. + * Both the canonical IPA and fault IPA must be aligned to the + * mapping size to ensure we find the right PFN and lay down the + * mapping in the right place. */ - if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) { - fault_ipa &= ~(vma_pagesize - 1); - ipa &= ~(vma_pagesize - 1); - } + fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize); + ipa = ALIGN_DOWN(ipa, vma_pagesize); gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); @@ -1839,10 +1838,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && s2_force_noncacheable) ret = -ENOEXEC; - if (ret) { - kvm_release_page_unused(page); - return ret; - } + if (ret) + goto out_put_page; /* * Guest performs atomic/exclusive operations on memory with unsupported @@ -1852,7 +1849,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; + ret = 1; + goto out_put_page; } if (nested) @@ -1938,6 +1936,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty_in_slot(kvm, memslot, gfn); return ret != -EAGAIN ? ret : 0; + +out_put_page: + kvm_release_page_unused(page); + return ret; } /* Resolve the access fault by making the page young again. */
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 620126d..2c43097 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c
@@ -152,31 +152,31 @@ static int get_ia_size(struct s2_walk_info *wi) return 64 - wi->t0sz; } -static int check_base_s2_limits(struct s2_walk_info *wi, +static int check_base_s2_limits(struct kvm_vcpu *vcpu, struct s2_walk_info *wi, int level, int input_size, int stride) { - int start_size, ia_size; + int start_size, pa_max; - ia_size = get_ia_size(wi); + pa_max = kvm_get_pa_bits(vcpu->kvm); /* Check translation limits */ switch (BIT(wi->pgshift)) { case SZ_64K: - if (level == 0 || (level == 1 && ia_size <= 42)) + if (level == 0 || (level == 1 && pa_max <= 42)) return -EFAULT; break; case SZ_16K: - if (level == 0 || (level == 1 && ia_size <= 40)) + if (level == 0 || (level == 1 && pa_max <= 40)) return -EFAULT; break; case SZ_4K: - if (level < 0 || (level == 0 && ia_size <= 42)) + if (level < 0 || (level == 0 && pa_max <= 42)) return -EFAULT; break; } /* Check input size limits */ - if (input_size > ia_size) + if (input_size > pa_max) return -EFAULT; /* Check number of entries in starting level table */ @@ -269,16 +269,19 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, if (input_size > 48 || input_size < 25) return -EFAULT; - ret = check_base_s2_limits(wi, level, input_size, stride); - if (WARN_ON(ret)) + ret = check_base_s2_limits(vcpu, wi, level, input_size, stride); + if (WARN_ON(ret)) { + out->esr = compute_fsc(0, ESR_ELx_FSC_FAULT); return ret; + } base_lower_bound = 3 + input_size - ((3 - level) * stride + wi->pgshift); base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound); if (check_output_size(wi, base_addr)) { - out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); + /* R_BFHQH */ + out->esr = compute_fsc(0, ESR_ELx_FSC_ADDRSZ); return 1; } @@ -293,8 +296,10 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, paddr = base_addr | index; ret = read_guest_s2_desc(vcpu, paddr, &desc, wi); - if (ret < 0) + if (ret < 0) { + out->esr = ESR_ELx_FSC_SEA_TTW(level); return ret; + } new_desc = desc; @@ -854,6 +859,33 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2) return kvm_inject_nested_sync(vcpu, esr_el2); } +u16 get_asid_by_regime(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + enum vcpu_sysreg ttbr_elx; + u64 tcr; + u16 asid; + + switch (regime) { + case TR_EL10: + tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); + ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL1 : TTBR0_EL1; + break; + case TR_EL20: + tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + ttbr_elx = (tcr & TCR_A1) ? TTBR1_EL2 : TTBR0_EL2; + break; + default: + BUG(); + } + + asid = FIELD_GET(TTBRx_EL1_ASID, vcpu_read_sys_reg(vcpu, ttbr_elx)); + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || + !(tcr & TCR_ASID16)) + asid &= GENMASK(7, 0); + + return asid; +} + static void invalidate_vncr(struct vncr_tlb *vt) { vt->valid = false; @@ -1154,9 +1186,6 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; - if (!kvm->arch.nested_mmus_size) - return; - for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1336,20 +1365,8 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu) if (read_vncr_el2(vcpu) != vt->gva) return false; - if (vt->wr.nG) { - u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - u64 ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - u16 asid; - - asid = FIELD_GET(TTBR_ASID_MASK, ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - asid &= GENMASK(7, 0); - - return asid == vt->wr.asid; - } + if (vt->wr.nG) + return get_asid_by_regime(vcpu, TR_EL20) == vt->wr.asid; return true; } @@ -1452,21 +1469,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) if (read_vncr_el2(vcpu) != vt->gva) return; - if (vt->wr.nG) { - u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); - u64 ttbr = ((tcr & TCR_A1) ? - vcpu_read_sys_reg(vcpu, TTBR1_EL2) : - vcpu_read_sys_reg(vcpu, TTBR0_EL2)); - u16 asid; - - asid = FIELD_GET(TTBR_ASID_MASK, ttbr); - if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || - !(tcr & TCR_ASID16)) - asid &= GENMASK(7, 0); - - if (asid != vt->wr.asid) - return; - } + if (vt->wr.nG && get_asid_by_regime(vcpu, TR_EL20) != vt->wr.asid) + return; vt->cpu = smp_processor_id();
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 9595324..b963fd9 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c
@@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; + + /* + * We may come from a state where either a PC update was + * pending (SMC call resulting in PC being increpented to + * skip the SMC) or a pending exception. Make sure we get + * rid of all that, as this cannot be valid out of reset. + * + * Note that clearing the exception mask also clears PC + * updates, but that's an implementation detail, and we + * really want to make it explicit. + */ + vcpu_clear_flag(vcpu, PENDING_EXCEPTION); + vcpu_clear_flag(vcpu, EXCEPT_MASK); + vcpu_clear_flag(vcpu, INCREMENT_PC); vcpu_set_reg(vcpu, 0, reset_state.r0); }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a7cd0ba..1b4cacb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c
@@ -1816,6 +1816,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_SCTLRX | ID_AA64MMFR3_EL1_S1POE | ID_AA64MMFR3_EL1_S1PIE; + + if (!system_supports_poe()) + val &= ~ID_AA64MMFR3_EL1_S1POE; break; case SYS_ID_MMFR4_EL1: val &= ~ID_MMFR4_EL1_CCIDX;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 9b3091a..e9b8b5f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -143,23 +143,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; - - kvm_for_each_vcpu(i, vcpu, kvm) { - ret = vgic_allocate_private_irqs_locked(vcpu, type); - if (ret) - break; - } - - if (ret) { - kvm_for_each_vcpu(i, vcpu, kvm) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - kfree(vgic_cpu->private_irqs); - vgic_cpu->private_irqs = NULL; - } - - goto out_unlock; - } - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; @@ -176,6 +159,23 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_allocate_private_irqs_locked(vcpu, type); + if (ret) + break; + } + + if (ret) { + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + kfree(vgic_cpu->private_irqs); + vgic_cpu->private_irqs = NULL; + } + + kvm->arch.vgic.vgic_model = 0; + goto out_unlock; + } + if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 585491fb..cafa3cb 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -115,7 +115,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr); - struct vgic_irq *irq; + struct vgic_irq *irq = *host_data_ptr(last_lr_irq); DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -123,7 +123,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]); /* See the GICv3 equivalent for the EOIcount handling rationale */ - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) { u32 lr; if (!eoicount) {
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 386ddf6..6a355ec 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -148,7 +148,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); - struct vgic_irq *irq; + struct vgic_irq *irq = *host_data_ptr(last_lr_irq); DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -158,12 +158,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* * EOIMode=0: use EOIcount to emulate deactivation. We are * guaranteed to deactivate in reverse order of the activation, so - * just pick one active interrupt after the other in the ap_list, - * and replay the deactivation as if the CPU was doing it. We also - * rely on priority drop to have taken place, and the list to be - * sorted by priority. + * just pick one active interrupt after the other in the tail part + * of the ap_list, past the LRs, and replay the deactivation as if + * the CPU was doing it. We also rely on priority drop to have taken + * place, and the list to be sorted by priority. */ - list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) { u64 lr; /*
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 430aa98..e22b79c 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c
@@ -814,6 +814,9 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) { + if (!*host_data_ptr(last_lr_irq)) + return; + if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_fold_lr_state(vcpu); else @@ -960,10 +963,13 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) if (irqs_outside_lrs(&als)) vgic_sort_ap_list(vcpu); + *host_data_ptr(last_lr_irq) = NULL; + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { scoped_guard(raw_spinlock, &irq->irq_lock) { if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); + *host_data_ptr(last_lr_irq) = irq; } }
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index d023413..e278e06 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c
@@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops) * Note that userspace cannot change the offset behind our back either, * as the vcpu mutex is held as long as KVM_RUN is in progress. */ -#define __delay_cycles() __arch_counter_get_cntvct_stable() +static cycles_t notrace __delay_cycles(void) +{ + guard(preempt_notrace)(); + return __arch_counter_get_cntvct_stable(); +} void __delay(unsigned long cycles) {
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index b929a45..1519d09 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c
@@ -599,6 +599,27 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes); +static bool contpte_all_subptes_match_access_flags(pte_t *ptep, pte_t entry) +{ + pte_t *cont_ptep = contpte_align_down(ptep); + /* + * PFNs differ per sub-PTE. Match only bits consumed by + * __ptep_set_access_flags(): AF, DIRTY and write permission. + */ + const pteval_t cmp_mask = PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; + pteval_t entry_cmp = pte_val(entry) & cmp_mask; + int i; + + for (i = 0; i < CONT_PTES; i++) { + pteval_t pte_cmp = pte_val(__ptep_get(cont_ptep + i)) & cmp_mask; + + if (pte_cmp != entry_cmp) + return false; + } + + return true; +} + int contpte_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t entry, int dirty) @@ -608,14 +629,38 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma, int i; /* - * Gather the access/dirty bits for the contiguous range. If nothing has - * changed, its a noop. + * Check whether all sub-PTEs in the CONT block already match the + * requested access flags/write permission, using raw per-PTE values + * rather than the gathered ptep_get() view. + * + * __ptep_set_access_flags() can update AF, dirty and write + * permission, but only to make the mapping more permissive. + * + * ptep_get() gathers AF/dirty state across the whole CONT block, + * which is correct for a CPU with FEAT_HAFDBS. But page-table + * walkers that evaluate each descriptor individually (e.g. a CPU + * without DBM support, or an SMMU without HTTU, or with HA/HD + * disabled in CD.TCR) can keep faulting on the target sub-PTE if + * only a sibling has been updated. Gathering can therefore cause + * false no-ops when only a sibling has been updated: + * - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared) + * - read faults: target still lacks PTE_AF + * + * Per Arm ARM (DDI 0487) D8.7.1, any sub-PTE in a CONT range may + * become the effective cached translation, so all entries must have + * consistent attributes. Check the full CONT block before returning + * no-op, and when any sub-PTE mismatches, proceed to update the whole + * range. */ - orig_pte = pte_mknoncont(ptep_get(ptep)); - if (pte_val(orig_pte) == pte_val(entry)) + if (contpte_all_subptes_match_access_flags(ptep, entry)) return 0; /* + * Use raw target pte (not gathered) for write-bit unfold decision. + */ + orig_pte = pte_mknoncont(__ptep_get(ptep)); + + /* * We can fix up access/dirty bits without having to unfold the contig * range. But if the write bit is changing, we must unfold. */
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b12cbed..6eef48e 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c
@@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) return 0; } -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - pgprot_t pgprot) +void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size, + pgprot_t pgprot) { unsigned long last_addr = phys_addr + size - 1; @@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, return generic_ioremap_prot(phys_addr, size, pgprot); } -EXPORT_SYMBOL(ioremap_prot); +EXPORT_SYMBOL(__ioremap_prot); /* * Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 08ee177..92b2f50 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c
@@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC }; +static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO; + /* * You really shouldn't be using read() or write() on /dev/mem. This might go * away in the future. @@ -73,9 +75,11 @@ static int __init adjust_protection_map(void) protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; } - if (lpa2_is_enabled()) + if (lpa2_is_enabled()) { for (int i = 0; i < ARRAY_SIZE(protection_map); i++) pgprot_val(protection_map[i]) &= ~PTE_SHARED; + gcs_page_prot &= ~PTE_SHARED; + } return 0; } @@ -87,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags) /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { - prot = _PAGE_GCS_RO; + /* Honour mprotect(PROT_NONE) on shadow stack mappings */ + if (vm_flags & VM_ACCESS_FLAGS) + prot = gcs_page_prot; + else + prot = pgprot_val(protection_map[VM_NONE]); } else { prot = pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 356d33c..adf8496 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2119,7 +2119,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); image_size = extable_offset + extable_size; ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, - sizeof(u32), &header, &image_ptr, + sizeof(u64), &header, &image_ptr, jit_fill_hole); if (!ro_header) { prog = orig_prog;
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S index d718961..8194398 100644 --- a/arch/csky/kernel/vmlinux.lds.S +++ b/arch/csky/kernel/vmlinux.lds.S
@@ -109,6 +109,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1150b77..aae2228 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -62,6 +62,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS .hexagon.attributes 0 : { *(.hexagon.attributes) }
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d211c65..92068ff 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig
@@ -304,6 +304,9 @@ config AS_HAS_LVZ_EXTENSION def_bool $(as-instr,hvcl 0) +config AS_HAS_SCQ_EXTENSION + def_bool $(as-instr,sc.q \$t0$(comma)\$t1$(comma)\$t2) + config CC_HAS_ANNOTATE_TABLEJUMP def_bool $(cc-option,-mannotate-tablejump)
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 58cabab..909f927 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -238,6 +238,8 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int arch_cmpxchg((ptr), (o), (n)); \ }) +#ifdef CONFIG_AS_HAS_SCQ_EXTENSION + union __u128_halves { u128 full; struct { @@ -290,6 +292,9 @@ union __u128_halves { BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ __arch_cmpxchg128(ptr, o, n, ""); \ }) + +#endif /* CONFIG_AS_HAS_SCQ_EXTENSION */ + #else #include <asm-generic/cmpxchg-local.h> #define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index e2eca1a..a1bd6a3 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h
@@ -41,4 +41,40 @@ .cfi_endproc; \ SYM_END(name, SYM_T_NONE) +/* + * This is for the signal handler trampoline, which is used as the return + * address of the signal handlers in userspace instead of called normally. + * The long standing libgcc bug https://gcc.gnu.org/PR124050 requires a + * nop between .cfi_startproc and the actual address of the trampoline, so + * we cannot simply use SYM_FUNC_START. + * + * This wrapper also contains all the .cfi_* directives for recovering + * the content of the GPRs and the "return address" (where the rt_sigreturn + * syscall will jump to), assuming there is a struct rt_sigframe (where + * a struct sigcontext containing those information we need to recover) at + * $sp. The "DWARF for the LoongArch(TM) Architecture" manual states + * column 0 is for $zero, but it does not make too much sense to + * save/restore the hardware zero register. Repurpose this column here + * for the return address (here it's not the content of $ra we cannot use + * the default column 3). + */ +#define SYM_SIGFUNC_START(name) \ + .cfi_startproc; \ + .cfi_signal_frame; \ + .cfi_def_cfa 3, RT_SIGFRAME_SC; \ + .cfi_return_column 0; \ + .cfi_offset 0, SC_PC; \ + \ + .irp num, 1, 2, 3, 4, 5, 6, 7, 8, \ + 9, 10, 11, 12, 13, 14, 15, 16, \ + 17, 18, 19, 20, 21, 22, 23, 24, \ + 25, 26, 27, 28, 29, 30, 31; \ + .cfi_offset \num, SC_REGS + \num * SZREG; \ + .endr; \ + \ + nop; \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) + +#define SYM_SIGFUNC_END(name) SYM_FUNC_END(name) + #endif
diff --git a/arch/loongarch/include/asm/sigframe.h b/arch/loongarch/include/asm/sigframe.h new file mode 100644 index 0000000..109298b8 --- /dev/null +++ b/arch/loongarch/include/asm/sigframe.h
@@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <asm/siginfo.h> +#include <asm/ucontext.h> + +struct rt_sigframe { + struct siginfo rs_info; + struct ucontext rs_uctx; +};
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 4e259d4..4382693 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h
@@ -253,8 +253,13 @@ do { \ \ __get_kernel_common(*((type *)(dst)), sizeof(type), \ (__force type *)(src)); \ - if (unlikely(__gu_err)) \ + if (unlikely(__gu_err)) { \ + pr_info("%s: memory access failed, ecode 0x%x\n", \ + __func__, read_csr_excode()); \ + pr_info("%s: the caller is %pS\n", \ + __func__, __builtin_return_address(0)); \ goto err_label; \ + } \ } while (0) #define __put_kernel_nofault(dst, src, type, err_label) \ @@ -264,8 +269,13 @@ do { \ \ __pu_val = *(__force type *)(src); \ __put_kernel_common(((type *)(dst)), sizeof(type)); \ - if (unlikely(__pu_err)) \ + if (unlikely(__pu_err)) { \ + pr_info("%s: memory access failed, ecode 0x%x\n", \ + __func__, read_csr_excode()); \ + pr_info("%s: the caller is %pS\n", \ + __func__, __builtin_return_address(0)); \ goto err_label; \ + } \ } while (0) extern unsigned long __copy_user(void *to, const void *from, __kernel_size_t n);
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 3017c71..2cc953f 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c
@@ -16,6 +16,7 @@ #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/ftrace.h> +#include <asm/sigframe.h> #include <vdso/datapage.h> static void __used output_ptreg_defines(void) @@ -220,6 +221,7 @@ static void __used output_sc_defines(void) COMMENT("Linux sigcontext offsets."); OFFSET(SC_REGS, sigcontext, sc_regs); OFFSET(SC_PC, sigcontext, sc_pc); + OFFSET(RT_SIGFRAME_SC, rt_sigframe, rs_uctx.uc_mcontext); BLANK(); }
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 841206f..6524567 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c
@@ -42,16 +42,15 @@ static int __init init_cpu_fullname(void) int cpu, ret; char *cpuname; const char *model; - struct device_node *root; /* Parsing cpuname from DTS model property */ - root = of_find_node_by_path("/"); - ret = of_property_read_string(root, "model", &model); + ret = of_property_read_string(of_root, "model", &model); if (ret == 0) { cpuname = kstrdup(model, GFP_KERNEL); + if (!cpuname) + return -ENOMEM; loongson_sysconf.cpuname = strsep(&cpuname, " "); } - of_node_put(root); if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { for (cpu = 0; cpu < NR_CPUS; cpu++)
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index bf037f0..1a72808 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c
@@ -246,32 +246,51 @@ static int text_copy_cb(void *data) if (smp_processor_id() == copy->cpu) { ret = copy_to_kernel_nofault(copy->dst, copy->src, copy->len); - if (ret) + if (ret) { pr_err("%s: operation failed\n", __func__); + return ret; + } } flush_icache_range((unsigned long)copy->dst, (unsigned long)copy->dst + copy->len); - return ret; + return 0; } int larch_insn_text_copy(void *dst, void *src, size_t len) { int ret = 0; + int err = 0; size_t start, end; struct insn_copy copy = { .dst = dst, .src = src, .len = len, - .cpu = smp_processor_id(), + .cpu = raw_smp_processor_id(), }; + /* + * Ensure copy.cpu won't be hot removed before stop_machine. + * If it is removed nobody will really update the text. + */ + lockdep_assert_cpus_held(); + start = round_down((size_t)dst, PAGE_SIZE); end = round_up((size_t)dst + len, PAGE_SIZE); - set_memory_rw(start, (end - start) / PAGE_SIZE); - ret = stop_machine(text_copy_cb, ©, cpu_online_mask); - set_memory_rox(start, (end - start) / PAGE_SIZE); + err = set_memory_rw(start, (end - start) / PAGE_SIZE); + if (err) { + pr_info("%s: set_memory_rw() failed\n", __func__); + return err; + } + + ret = stop_machine_cpuslocked(text_copy_cb, ©, cpu_online_mask); + + err = set_memory_rox(start, (end - start) / PAGE_SIZE); + if (err) { + pr_info("%s: set_memory_rox() failed\n", __func__); + return err; + } return ret; }
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index c9f7ca7..d4151d2 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c
@@ -35,6 +35,7 @@ #include <asm/cpu-features.h> #include <asm/fpu.h> #include <asm/lbt.h> +#include <asm/sigframe.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -51,11 +52,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -struct rt_sigframe { - struct siginfo rs_info; - struct ucontext rs_uctx; -}; - struct _ctx_layout { struct sctx_info *addr; unsigned int size;
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 08ea921..d0e1377 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -147,6 +147,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS #ifdef CONFIG_EFI_STUB
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index ed4f724..8e52136 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig
@@ -28,7 +28,6 @@ select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING - select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index d2acb4d..003bd773 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -83,7 +83,7 @@ static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { cpuid = ffs(cpuid) - 1; - cpuid = (cpuid >= 4) ? 0 : cpuid; + cpuid = ((cpuid < 0) || (cpuid >= 4)) ? 0 : cpuid; } vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); @@ -472,34 +472,34 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, switch (addr) { case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: offset = (addr - EIOINTC_NODETYPE_START) / 4; - p = s->nodetype + offset * 4; + p = (void *)s->nodetype + offset * 4; break; case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: offset = (addr - EIOINTC_IPMAP_START) / 4; - p = &s->ipmap + offset * 4; + p = (void *)&s->ipmap + offset * 4; break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: offset = (addr - EIOINTC_ENABLE_START) / 4; - p = s->enable + offset * 4; + p = (void *)s->enable + offset * 4; break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: offset = (addr - EIOINTC_BOUNCE_START) / 4; - p = s->bounce + offset * 4; + p = (void *)s->bounce + offset * 4; break; case EIOINTC_ISR_START ... EIOINTC_ISR_END: offset = (addr - EIOINTC_ISR_START) / 4; - p = s->isr + offset * 4; + p = (void *)s->isr + offset * 4; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: if (cpu >= s->num_cpu) return -EINVAL; offset = (addr - EIOINTC_COREISR_START) / 4; - p = s->coreisr[cpu] + offset * 4; + p = (void *)s->coreisr[cpu] + offset * 4; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; - p = s->coremap + offset * 4; + p = (void *)s->coremap + offset * 4; break; default: kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr);
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 09e137f..831f381 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c
@@ -14,7 +14,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, int_exits), STATS_DESC_COUNTER(VCPU, idle_exits), @@ -588,6 +588,9 @@ struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) { struct kvm_phyid_map *map; + if (cpuid < 0) + return NULL; + if (cpuid >= KVM_MAX_PHYID) return NULL;
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 63fd405..8cc5ee1 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c
@@ -10,7 +10,7 @@ #include <asm/kvm_eiointc.h> #include <asm/kvm_pch_pic.h> -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, pages), STATS_DESC_ICOUNTER(VM, hugepages), @@ -49,8 +49,8 @@ static void kvm_vm_init_features(struct kvm *kvm) kvm->arch.kvm_features |= BIT(KVM_LOONGARCH_VM_FEAT_PMU); /* Enable all PV features by default */ - kvm->arch.pv_features = BIT(KVM_FEATURE_IPI); - kvm->arch.kvm_features = BIT(KVM_LOONGARCH_VM_FEAT_PV_IPI); + kvm->arch.pv_features |= BIT(KVM_FEATURE_IPI); + kvm->arch.kvm_features |= BIT(KVM_LOONGARCH_VM_FEAT_PV_IPI); if (kvm_pvtime_supported()) { kvm->arch.pv_features |= BIT(KVM_FEATURE_PREEMPT); kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); @@ -118,7 +118,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: - case KVM_CAP_SYNC_MMU: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE:
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 3bd89f5..9cb796e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c
@@ -1379,9 +1379,11 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; + cpus_read_lock(); mutex_lock(&text_mutex); ret = larch_insn_text_copy(dst, src, len); mutex_unlock(&text_mutex); + cpus_read_unlock(); return ret ? ERR_PTR(-EINVAL) : dst; } @@ -1429,10 +1431,12 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, if (ret) return ret; + cpus_read_lock(); mutex_lock(&text_mutex); if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); mutex_unlock(&text_mutex); + cpus_read_unlock(); return ret; } @@ -1450,10 +1454,12 @@ int bpf_arch_text_invalidate(void *dst, size_t len) for (i = 0; i < (len / sizeof(u32)); i++) inst[i] = INSN_BREAK; + cpus_read_lock(); mutex_lock(&text_mutex); if (larch_insn_text_copy(dst, inst, len)) ret = -EINVAL; mutex_unlock(&text_mutex); + cpus_read_unlock(); kvfree(inst); @@ -1568,6 +1574,11 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + /* * Sign-extend the register if necessary */
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d923295..d233ea2 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c
@@ -5,9 +5,11 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/acpi.h> +#include <linux/delay.h> #include <linux/types.h> #include <linux/pci.h> #include <linux/vgaarb.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <asm/cacheflush.h> #include <asm/loongson.h> @@ -15,6 +17,9 @@ #define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 #define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36 #define PCI_DEVICE_ID_LOONGSON_DC3 0x7a46 +#define PCI_DEVICE_ID_LOONGSON_GPU1 0x7a15 +#define PCI_DEVICE_ID_LOONGSON_GPU2 0x7a25 +#define PCI_DEVICE_ID_LOONGSON_GPU3 0x7a35 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) @@ -99,3 +104,78 @@ static void pci_fixup_vgadev(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC3, pci_fixup_vgadev); + +#define CRTC_NUM_MAX 2 +#define CRTC_OUTPUT_ENABLE 0x100 + +static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) +{ + u32 i, val, count, crtc_offset, device; + void __iomem *crtc_reg, *base, *regbase; + static u32 crtc_status[CRTC_NUM_MAX] = { 0 }; + + base = pdev->bus->ops->map_bus(pdev->bus, pdev->devfn + 1, 0); + device = readw(base + PCI_DEVICE_ID); + + regbase = ioremap(readq(base + PCI_BASE_ADDRESS_0) & ~0xffull, SZ_64K); + if (!regbase) { + pci_err(pdev, "Failed to ioremap()\n"); + return; + } + + switch (device) { + case PCI_DEVICE_ID_LOONGSON_DC2: + crtc_reg = regbase + 0x1240; + crtc_offset = 0x10; + break; + case PCI_DEVICE_ID_LOONGSON_DC3: + crtc_reg = regbase; + crtc_offset = 0x400; + break; + } + + for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { + val = readl(crtc_reg); + + if (!on) + crtc_status[i] = val; + + /* No need to fixup if the status is off at startup. */ + if (!(crtc_status[i] & CRTC_OUTPUT_ENABLE)) + continue; + + if (on) + val |= CRTC_OUTPUT_ENABLE; + else + val &= ~CRTC_OUTPUT_ENABLE; + + mb(); + writel(val, crtc_reg); + + for (count = 0; count < 40; count++) { + val = readl(crtc_reg) & CRTC_OUTPUT_ENABLE; + if ((on && val) || (!on && !val)) + break; + udelay(1000); + } + + pci_info(pdev, "DMA hang fixup at reg[0x%lx]: 0x%x\n", + (unsigned long)crtc_reg & 0xffff, readl(crtc_reg)); + } + + iounmap(regbase); +} + +static void pci_fixup_dma_hang_early(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, false); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_early); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_early); + +static void pci_fixup_dma_hang_final(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, true); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_final); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_final);
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 520f151..294c16b 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile
@@ -26,7 +26,7 @@ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ - $(call cc-option, -fno-asynchronous-unwind-tables) \ + $(call cc-option, -fasynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) aflags-vdso := $(ccflags-vdso) \ -D__ASSEMBLY__ -Wa,-gdwarf-2 @@ -41,7 +41,7 @@ # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id --eh-frame-hdr -T # # Shared build commands.
diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S index 9cb3c58..59f940d 100644 --- a/arch/loongarch/vdso/sigreturn.S +++ b/arch/loongarch/vdso/sigreturn.S
@@ -12,13 +12,13 @@ #include <asm/regdef.h> #include <asm/asm.h> +#include <asm/asm-offsets.h> .section .text - .cfi_sections .debug_frame -SYM_FUNC_START(__vdso_rt_sigreturn) +SYM_SIGFUNC_START(__vdso_rt_sigreturn) li.w a7, __NR_rt_sigreturn syscall 0 -SYM_FUNC_END(__vdso_rt_sigreturn) +SYM_SIGFUNC_END(__vdso_rt_sigreturn)
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 2624fc1..45d7f4b 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -85,6 +85,7 @@ _end = .; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 1ccdd04a..7326586 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -58,6 +58,7 @@ _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index f13ddcc..1b19fef 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -51,6 +51,7 @@ _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded */
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 404390b..3f11e52 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h
@@ -484,7 +484,6 @@ # endif # ifndef cpu_vmbits # define cpu_vmbits cpu_data[0].vmbits -# define __NEED_VMBITS_PROBE # endif #endif
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index fd60837..211b578 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h
@@ -80,9 +80,7 @@ struct cpuinfo_mips { int srsets; /* Shadow register sets */ int package;/* physical package number */ unsigned int globalnumber; -#ifdef CONFIG_64BIT int vmbits; /* Virtual memory size in bits */ -#endif void *data; /* Additional data */ unsigned int watch_reg_count; /* Number that exist */ unsigned int watch_reg_use_cnt; /* Usable by ptrace */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index f799c0d..12a095d 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h
@@ -1871,6 +1871,8 @@ do { \ #define read_c0_entryhi() __read_ulong_c0_register($10, 0) #define write_c0_entryhi(val) __write_ulong_c0_register($10, 0, val) +#define read_c0_entryhi_64() __read_64bit_c0_register($10, 0) +#define write_c0_entryhi_64(val) __write_64bit_c0_register($10, 0, val) #define read_c0_guestctl1() __read_32bit_c0_register($10, 4) #define write_c0_guestctl1(val) __write_32bit_c0_register($10, 4, val)
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 1e49e05..489612e 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c
@@ -210,11 +210,14 @@ static inline void set_elf_base_platform(const char *plat) static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) { -#ifdef __NEED_VMBITS_PROBE - write_c0_entryhi(0x3fffffffffffe000ULL); - back_to_back_c0_hazard(); - c->vmbits = fls64(read_c0_entryhi() & 0x3fffffffffffe000ULL); -#endif + int vmbits = 31; + + if (cpu_has_64bits) { + write_c0_entryhi_64(0x3fffffffffffe000ULL); + back_to_back_c0_hazard(); + vmbits = fls64(read_c0_entryhi_64() & 0x3fffffffffffe000ULL); + } + c->vmbits = vmbits; } static void set_isa(struct cpuinfo_mips *c, unsigned int isa)
diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index 0c826f7..edcf04d 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c
@@ -137,6 +137,8 @@ void cpu_probe(void) else cpu_set_nofpu_opts(c); + c->vmbits = 31; + reserve_exception_space(0, 0x400); }
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 2b708fa..579b2cc 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S
@@ -217,6 +217,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /* These must appear regardless of . */
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index cc13cc3..b1b9a1d6 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig
@@ -23,7 +23,6 @@ select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER select KVM_GENERIC_HARDWARE_ENABLING select HAVE_KVM_READONLY_MEM help
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b0fb92f..a53abbb 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c
@@ -38,7 +38,7 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, wait_exits), STATS_DESC_COUNTER(VCPU, cache_exits), @@ -1035,7 +1035,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: - case KVM_CAP_SYNC_MMU: case KVM_CAP_IMMEDIATE_EXIT: r = 1; break;
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c index 4c2483f..92b3778 100644 --- a/arch/mips/lib/multi3.c +++ b/arch/mips/lib/multi3.c
@@ -4,12 +4,12 @@ #include "libgcc.h" /* - * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for + * GCC 9 & older can suboptimally generate __multi3 calls for mips64r6, so for * that specific case only we implement that intrinsic here. * * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 */ -#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8) +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 10) /* multiply 64-bit values, low 64-bits returned */ static inline long long notrace dmulu(long long a, long long b) @@ -51,4 +51,4 @@ ti_type notrace __multi3(ti_type a, ti_type b) } EXPORT_SYMBOL(__multi3); -#endif /* 64BIT && CPU_MIPSR6 && GCC7 */ +#endif /* 64BIT && CPU_MIPSR6 && GCC9 */
diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index 11ddf02..7abcca7 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c
@@ -17,7 +17,9 @@ #include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/libfdt.h> +#include <linux/minmax.h> #include <linux/pci_ids.h> +#include <linux/serial_core.h> #include <linux/string_choices.h> #include <asm/bootinfo.h> #include <loongson.h> @@ -106,9 +108,23 @@ static void __init lefi_fixup_fdt(struct system_loongson *system) is_loongson64g = (read_c0_prid() & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G; - for (i = 0; i < system->nr_uarts; i++) { + for (i = 0; i < min(system->nr_uarts, MAX_UARTS); i++) { uartdev = &system->uarts[i]; + /* + * Some firmware does not set nr_uarts properly and passes empty + * items. Ignore them silently. + */ + if (uartdev->uart_base == 0) + continue; + + /* Our DT only works with UPIO_MEM. */ + if (uartdev->iotype != UPIO_MEM) { + pr_warn("Ignore UART 0x%llx with iotype %u passed by firmware\n", + uartdev->uart_base, uartdev->iotype); + continue; + } + ret = lefi_fixup_fdt_serial(fdt_buf, uartdev->uart_base, uartdev->uartclk); /*
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index e3b4224..ad9b043 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c
@@ -207,7 +207,8 @@ void cpu_cache_init(void) { if (IS_ENABLED(CONFIG_CPU_R3000) && cpu_has_3k_cache) r3k_cache_init(); - if (IS_ENABLED(CONFIG_CPU_R4K_CACHE_TLB) && cpu_has_4k_cache) + if ((IS_ENABLED(CONFIG_CPU_R4K_CACHE_TLB) || + IS_ENABLED(CONFIG_CPU_SB1)) && cpu_has_4k_cache) r4k_cache_init(); if (IS_ENABLED(CONFIG_CPU_CAVIUM_OCTEON) && cpu_has_octeon_cache)
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 44a6625..24fe85f 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c
@@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/smp.h> #include <linux/memblock.h> +#include <linux/minmax.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/export.h> @@ -24,6 +25,7 @@ #include <asm/hazards.h> #include <asm/mmu_context.h> #include <asm/tlb.h> +#include <asm/tlbdebug.h> #include <asm/tlbex.h> #include <asm/tlbmisc.h> #include <asm/setup.h> @@ -511,12 +513,229 @@ static int __init set_ntlb(char *str) __setup("ntlb=", set_ntlb); -/* Comparison function for EntryHi VPN fields. */ -static int r4k_vpn_cmp(const void *a, const void *b) +/* The start bit position of VPN2 and Mask in EntryHi/PageMask registers. */ +#define VPN2_SHIFT 13 + +/* Read full EntryHi even with CONFIG_32BIT. */ +static inline unsigned long long read_c0_entryhi_native(void) { - long v = *(unsigned long *)a - *(unsigned long *)b; - int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0; - return s ? (v != 0) | v >> s : v; + return cpu_has_64bits ? read_c0_entryhi_64() : read_c0_entryhi(); +} + +/* Write full EntryHi even with CONFIG_32BIT. */ +static inline void write_c0_entryhi_native(unsigned long long v) +{ + if (cpu_has_64bits) + write_c0_entryhi_64(v); + else + write_c0_entryhi(v); +} + +/* TLB entry state for uniquification. */ +struct tlbent { + unsigned long long wired:1; + unsigned long long global:1; + unsigned long long asid:10; + unsigned long long vpn:51; + unsigned long long pagesz:5; + unsigned long long index:14; +}; + +/* + * Comparison function for TLB entry sorting. Place wired entries first, + * then global entries, then order by the increasing VPN/ASID and the + * decreasing page size. This lets us avoid clashes with wired entries + * easily and get entries for larger pages out of the way first. + * + * We could group bits so as to reduce the number of comparisons, but this + * is seldom executed and not performance-critical, so prefer legibility. + */ +static int r4k_entry_cmp(const void *a, const void *b) +{ + struct tlbent ea = *(struct tlbent *)a, eb = *(struct tlbent *)b; + + if (ea.wired > eb.wired) + return -1; + else if (ea.wired < eb.wired) + return 1; + else if (ea.global > eb.global) + return -1; + else if (ea.global < eb.global) + return 1; + else if (ea.vpn < eb.vpn) + return -1; + else if (ea.vpn > eb.vpn) + return 1; + else if (ea.asid < eb.asid) + return -1; + else if (ea.asid > eb.asid) + return 1; + else if (ea.pagesz > eb.pagesz) + return -1; + else if (ea.pagesz < eb.pagesz) + return 1; + else + return 0; +} + +/* + * Fetch all the TLB entries. Mask individual VPN values retrieved with + * the corresponding page mask and ignoring any 1KiB extension as we'll + * be using 4KiB pages for uniquification. + */ +static void __ref r4k_tlb_uniquify_read(struct tlbent *tlb_vpns, int tlbsize) +{ + int start = num_wired_entries(); + unsigned long long vpn_mask; + bool global; + int i; + + vpn_mask = GENMASK(current_cpu_data.vmbits - 1, VPN2_SHIFT); + vpn_mask |= cpu_has_64bits ? 3ULL << 62 : 1 << 31; + + for (i = 0; i < tlbsize; i++) { + unsigned long long entryhi, vpn, mask, asid; + unsigned int pagesz; + + write_c0_index(i); + mtc0_tlbr_hazard(); + tlb_read(); + tlb_read_hazard(); + + global = !!(read_c0_entrylo0() & ENTRYLO_G); + entryhi = read_c0_entryhi_native(); + mask = read_c0_pagemask(); + + asid = entryhi & cpu_asid_mask(¤t_cpu_data); + vpn = (entryhi & vpn_mask & ~mask) >> VPN2_SHIFT; + pagesz = ilog2((mask >> VPN2_SHIFT) + 1); + + tlb_vpns[i].global = global; + tlb_vpns[i].asid = global ? 0 : asid; + tlb_vpns[i].vpn = vpn; + tlb_vpns[i].pagesz = pagesz; + tlb_vpns[i].wired = i < start; + tlb_vpns[i].index = i; + } +} + +/* + * Write unique values to all but the wired TLB entries each, using + * the 4KiB page size. This size might not be supported with R6, but + * EHINV is mandatory for R6, so we won't ever be called in that case. + * + * A sorted table is supplied with any wired entries at the beginning, + * followed by any global entries, and then finally regular entries. + * We start at the VPN and ASID values of zero and only assign user + * addresses, therefore guaranteeing no clash with addresses produced + * by UNIQUE_ENTRYHI. We avoid any VPN values used by wired or global + * entries, by increasing the VPN value beyond the span of such entry. + * + * When a VPN/ASID clash is found with a regular entry we increment the + * ASID instead until no VPN/ASID clash has been found or the ASID space + * has been exhausted, in which case we increase the VPN value beyond + * the span of the largest clashing entry. + * + * We do not need to be concerned about FTLB or MMID configurations as + * those are required to implement the EHINV feature. + */ +static void __ref r4k_tlb_uniquify_write(struct tlbent *tlb_vpns, int tlbsize) +{ + unsigned long long asid, vpn, vpn_size, pagesz; + int widx, gidx, idx, sidx, lidx, i; + + vpn_size = 1ULL << (current_cpu_data.vmbits - VPN2_SHIFT); + pagesz = ilog2((PM_4K >> VPN2_SHIFT) + 1); + + write_c0_pagemask(PM_4K); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + asid = 0; + vpn = 0; + widx = 0; + gidx = 0; + for (sidx = 0; sidx < tlbsize && tlb_vpns[sidx].wired; sidx++) + ; + for (lidx = sidx; lidx < tlbsize && tlb_vpns[lidx].global; lidx++) + ; + idx = gidx = sidx + 1; + for (i = sidx; i < tlbsize; i++) { + unsigned long long entryhi, vpn_pagesz = 0; + + while (1) { + if (WARN_ON(vpn >= vpn_size)) { + dump_tlb_all(); + /* Pray local_flush_tlb_all() will cope. */ + return; + } + + /* VPN must be below the next wired entry. */ + if (widx < sidx && vpn >= tlb_vpns[widx].vpn) { + vpn = max(vpn, + (tlb_vpns[widx].vpn + + (1ULL << tlb_vpns[widx].pagesz))); + asid = 0; + widx++; + continue; + } + /* VPN must be below the next global entry. */ + if (gidx < lidx && vpn >= tlb_vpns[gidx].vpn) { + vpn = max(vpn, + (tlb_vpns[gidx].vpn + + (1ULL << tlb_vpns[gidx].pagesz))); + asid = 0; + gidx++; + continue; + } + /* Try to find a free ASID so as to conserve VPNs. */ + if (idx < tlbsize && vpn == tlb_vpns[idx].vpn && + asid == tlb_vpns[idx].asid) { + unsigned long long idx_pagesz; + + idx_pagesz = tlb_vpns[idx].pagesz; + vpn_pagesz = max(vpn_pagesz, idx_pagesz); + do + idx++; + while (idx < tlbsize && + vpn == tlb_vpns[idx].vpn && + asid == tlb_vpns[idx].asid); + asid++; + if (asid > cpu_asid_mask(¤t_cpu_data)) { + vpn += vpn_pagesz; + asid = 0; + vpn_pagesz = 0; + } + continue; + } + /* VPN mustn't be above the next regular entry. */ + if (idx < tlbsize && vpn > tlb_vpns[idx].vpn) { + vpn = max(vpn, + (tlb_vpns[idx].vpn + + (1ULL << tlb_vpns[idx].pagesz))); + asid = 0; + idx++; + continue; + } + break; + } + + entryhi = (vpn << VPN2_SHIFT) | asid; + write_c0_entryhi_native(entryhi); + write_c0_index(tlb_vpns[i].index); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + + tlb_vpns[i].asid = asid; + tlb_vpns[i].vpn = vpn; + tlb_vpns[i].pagesz = pagesz; + + asid++; + if (asid > cpu_asid_mask(¤t_cpu_data)) { + vpn += 1ULL << pagesz; + asid = 0; + } + } } /* @@ -527,70 +746,25 @@ static void __ref r4k_tlb_uniquify(void) { int tlbsize = current_cpu_data.tlbsize; bool use_slab = slab_is_available(); - int start = num_wired_entries(); phys_addr_t tlb_vpn_size; - unsigned long *tlb_vpns; - unsigned long vpn_mask; - int cnt, ent, idx, i; - - vpn_mask = GENMASK(cpu_vmbits - 1, 13); - vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31; + struct tlbent *tlb_vpns; tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); tlb_vpns = (use_slab ? - kmalloc(tlb_vpn_size, GFP_KERNEL) : + kmalloc(tlb_vpn_size, GFP_ATOMIC) : memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); if (WARN_ON(!tlb_vpns)) return; /* Pray local_flush_tlb_all() is good enough. */ htw_stop(); - for (i = start, cnt = 0; i < tlbsize; i++, cnt++) { - unsigned long vpn; + r4k_tlb_uniquify_read(tlb_vpns, tlbsize); - write_c0_index(i); - mtc0_tlbr_hazard(); - tlb_read(); - tlb_read_hazard(); - vpn = read_c0_entryhi(); - vpn &= vpn_mask & PAGE_MASK; - tlb_vpns[cnt] = vpn; + sort(tlb_vpns, tlbsize, sizeof(*tlb_vpns), r4k_entry_cmp, NULL); - /* Prevent any large pages from overlapping regular ones. */ - write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - tlbw_use_hazard(); - } - - sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL); + r4k_tlb_uniquify_write(tlb_vpns, tlbsize); write_c0_pagemask(PM_DEFAULT_MASK); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - - idx = 0; - ent = tlbsize; - for (i = start; i < tlbsize; i++) - while (1) { - unsigned long entryhi, vpn; - - entryhi = UNIQUE_ENTRYHI(ent); - vpn = entryhi & vpn_mask & PAGE_MASK; - - if (idx >= cnt || vpn < tlb_vpns[idx]) { - write_c0_entryhi(entryhi); - write_c0_index(i); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - ent++; - break; - } else if (vpn == tlb_vpns[idx]) { - ent++; - } else { - idx++; - } - } tlbw_use_hazard(); htw_start(); @@ -640,7 +814,8 @@ static void r4k_tlb_configure(void) temp_tlb_entry = current_cpu_data.tlbsize - 1; /* From this point on the ARC firmware is dead. */ - r4k_tlb_uniquify(); + if (!cpu_has_tlbinv) + r4k_tlb_uniquify(); local_flush_tlb_all(); /* Did I tell you that ARC SUCKS? */
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c index 9db73fc..5c1eb46 100644 --- a/arch/mips/ralink/clk.c +++ b/arch/mips/ralink/clk.c
@@ -21,16 +21,16 @@ static const char *clk_cpu(int *idx) { switch (ralink_soc) { case RT2880_SOC: - *idx = 0; + *idx = 1; return "ralink,rt2880-sysc"; case RT3883_SOC: - *idx = 0; + *idx = 1; return "ralink,rt3883-sysc"; case RT305X_SOC_RT3050: - *idx = 0; + *idx = 1; return "ralink,rt3050-sysc"; case RT305X_SOC_RT3052: - *idx = 0; + *idx = 1; return "ralink,rt3052-sysc"; case RT305X_SOC_RT3350: *idx = 1;
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S index 37b9580..206f924 100644 --- a/arch/nios2/kernel/vmlinux.lds.S +++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -57,6 +57,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 049bff4..9b29c32 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ /* Throw in the debugging sections */ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /* Sections to be discarded -- must be last */
diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S index ab7b439..87d24cc 100644 --- a/arch/parisc/boot/compressed/vmlinux.lds.S +++ b/arch/parisc/boot/compressed/vmlinux.lds.S
@@ -90,6 +90,7 @@ /* Sections to be discarded */ DISCARDS /DISCARD/ : { + *(.modinfo) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 2c139a4d..17afe7a 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h
@@ -85,7 +85,7 @@ extern void __update_cache(pte_t pte); printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) /* This is the size of the initially mapped kernel memory */ -#if defined(CONFIG_64BIT) +#if defined(CONFIG_64BIT) || defined(CONFIG_KALLSYMS) #define KERNEL_INITIAL_ORDER 26 /* 1<<26 = 64MB */ #else #define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 4c5240d..b189265 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c
@@ -953,7 +953,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #else "1: cmpb,<<,n %0,%2,1b\n" #endif - " fic,m %3(%4,%0)\n" + " fdc,m %3(%4,%0)\n" "2: sync\n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) @@ -968,7 +968,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #else "1: cmpb,<<,n %0,%2,1b\n" #endif - " fdc,m %3(%4,%0)\n" + " fic,m %3(%4,%0)\n" "2: sync\n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error)
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 96e0264..9188c8d 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S
@@ -56,6 +56,7 @@ .import __bss_start,data .import __bss_stop,data + .import __end,data load32 PA(__bss_start),%r3 load32 PA(__bss_stop),%r4 @@ -149,7 +150,11 @@ * everything ... it will get remapped correctly later */ ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ load32 (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ - load32 PA(pg0),%r1 + load32 PA(_end),%r1 + SHRREG %r1,PAGE_SHIFT,%r1 /* %r1 is PFN count for _end symbol */ + cmpb,<<,n %r11,%r1,1f + copy %r1,%r11 /* %r1 PFN count smaller than %r11 */ +1: load32 PA(pg0),%r1 $pgt_fill_loop: STREGM %r3,ASM_PTE_ENTRY_SIZE(%r1)
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index ace483b..d3e17a7 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c
@@ -120,14 +120,6 @@ void __init setup_arch(char **cmdline_p) #endif printk(KERN_CONT ".\n"); - /* - * Check if initial kernel page mappings are sufficient. - * panic early if not, else we may access kernel functions - * and variables which can't be reached. - */ - if (__pa((unsigned long) &_end) >= KERNEL_INITIAL_SIZE) - panic("KERNEL_INITIAL_ORDER too small!"); - #ifdef CONFIG_64BIT if(parisc_narrow_firmware) { printk(KERN_INFO "Kernel is using PDC in 32-bit mode.\n"); @@ -279,6 +271,18 @@ void __init start_parisc(void) int ret, cpunum; struct pdc_coproc_cfg coproc_cfg; + /* + * Check if initial kernel page mapping is sufficient. + * Print warning if not, because we may access kernel functions and + * variables which can't be reached yet through the initial mappings. + * Note that the panic() and printk() functions are not functional + * yet, so we need to use direct iodc() firmware calls instead. + */ + const char warn1[] = "CRITICAL: Kernel may crash because " + "KERNEL_INITIAL_ORDER is too small.\n"; + if (__pa((unsigned long) &_end) >= KERNEL_INITIAL_SIZE) + pdc_iodc_print(warn1, sizeof(warn1) - 1); + /* check QEMU/SeaBIOS marker in PAGE0 */ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index b445e47..0ca93d6 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -165,6 +165,7 @@ _end = . ; STABS_DEBUG + MODINFO ELF_DETAILS .note 0 : { *(.note) }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad7a2fe..10240cb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig
@@ -573,8 +573,8 @@ depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2) depends on $(cc-option,-fpatchable-function-entry=2) def_bool y if PPC32 - def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN - def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN + def_bool $(success,$(srctree)/arch/powerpc/tools/check-fpatchable-function-entry.sh $(CC) $(CLANG_FLAGS) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN + def_bool $(success,$(srctree)/arch/powerpc/tools/check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN config PPC_FTRACE_OUT_OF_LINE def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts index 33ddb17..c541bd3 100644 --- a/arch/powerpc/boot/dts/asp834x-redboot.dts +++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -37,7 +37,7 @@ PowerPC,8347@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x8000000>; // 128MB at 0 };
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi deleted file mode 100644 index 9cffccf..0000000 --- a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi +++ /dev/null
@@ -1,156 +0,0 @@ -/* T4240 Interlaken LAC Portal device tree stub with 24 portals. - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#address-cells = <0x1>; -#size-cells = <0x1>; -compatible = "fsl,interlaken-lac-portals"; - -lportal0: lac-portal@0 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x0 0x1000>; -}; - -lportal1: lac-portal@1000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x1000 0x1000>; -}; - -lportal2: lac-portal@2000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x2000 0x1000>; -}; - -lportal3: lac-portal@3000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x3000 0x1000>; -}; - -lportal4: lac-portal@4000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x4000 0x1000>; -}; - -lportal5: lac-portal@5000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x5000 0x1000>; -}; - -lportal6: lac-portal@6000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x6000 0x1000>; -}; - -lportal7: lac-portal@7000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x7000 0x1000>; -}; - -lportal8: lac-portal@8000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x8000 0x1000>; -}; - -lportal9: lac-portal@9000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x9000 0x1000>; -}; - -lportal10: lac-portal@A000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xA000 0x1000>; -}; - -lportal11: lac-portal@B000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xB000 0x1000>; -}; - -lportal12: lac-portal@C000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xC000 0x1000>; -}; - -lportal13: lac-portal@D000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xD000 0x1000>; -}; - -lportal14: lac-portal@E000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xE000 0x1000>; -}; - -lportal15: lac-portal@F000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0xF000 0x1000>; -}; - -lportal16: lac-portal@10000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x10000 0x1000>; -}; - -lportal17: lac-portal@11000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x11000 0x1000>; -}; - -lportal18: lac-portal@1200 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x12000 0x1000>; -}; - -lportal19: lac-portal@13000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x13000 0x1000>; -}; - -lportal20: lac-portal@14000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x14000 0x1000>; -}; - -lportal21: lac-portal@15000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x15000 0x1000>; -}; - -lportal22: lac-portal@16000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x16000 0x1000>; -}; - -lportal23: lac-portal@17000 { - compatible = "fsl,interlaken-lac-portal-v1.0"; - reg = <0x17000 0x1000>; -};
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi deleted file mode 100644 index e820872..0000000 --- a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi +++ /dev/null
@@ -1,45 +0,0 @@ -/* - * T4 Interlaken Look-aside Controller (LAC) device tree stub - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -lac: lac@229000 { - compatible = "fsl,interlaken-lac"; - reg = <0x229000 0x1000>; - interrupts = <16 2 1 18>; -}; - -lac-hv@228000 { - compatible = "fsl,interlaken-lac-hv"; - reg = <0x228000 0x1000>; - fsl,non-hv-node = <&lac>; -};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi deleted file mode 100644 index 1cf0b77..0000000 --- a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi +++ /dev/null
@@ -1,43 +0,0 @@ -/* - * PQ3 MPIC Message (Group B) device tree stub [ controller @ offset 0x42400 ] - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -message@42400 { - compatible = "fsl,mpic-v3.1-msgr"; - reg = <0x42400 0x200>; - interrupts = < - 0xb4 2 0 0 - 0xb5 2 0 0 - 0xb6 2 0 0 - 0xb7 2 0 0>; -};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi deleted file mode 100644 index 71eb75e..0000000 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ /dev/null
@@ -1,80 +0,0 @@ -/* - * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ] - * - * Copyright 2012 - 2015 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -fman@400000 { - fman0_rx_0x09: port@89000 { - cell-index = <0x9>; - compatible = "fsl,fman-v3-port-rx"; - reg = <0x89000 0x1000>; - fsl,fman-10g-port; - fsl,fman-best-effort-port; - }; - - fman0_tx_0x29: port@a9000 { - cell-index = <0x29>; - compatible = "fsl,fman-v3-port-tx"; - reg = <0xa9000 0x1000>; - fsl,fman-10g-port; - fsl,fman-best-effort-port; - }; - - ethernet@e2000 { - cell-index = <1>; - compatible = "fsl,fman-memac"; - reg = <0xe2000 0x1000>; - fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>; - ptp-timer = <&ptp_timer0>; - pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>; - pcs-handle-names = "sgmii", "qsgmii"; - }; - - mdio@e1000 { - qsgmiia_pcs1: ethernet-pcs@1 { - compatible = "fsl,lynx-pcs"; - reg = <1>; - }; - }; - - mdio@e3000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; - reg = <0xe3000 0x1000>; - fsl,erratum-a011043; /* must ignore read errors */ - - pcsphy1: ethernet-phy@0 { - reg = <0x0>; - }; - }; -};
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts index 2638555..41f917f 100644 --- a/arch/powerpc/boot/dts/mpc8308_p1m.dts +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -37,7 +37,7 @@ PowerPC,8308@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 };
diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts index af2ed83..39ed26f 100644 --- a/arch/powerpc/boot/dts/mpc8308rdb.dts +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts
@@ -38,7 +38,7 @@ PowerPC,8308@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 };
diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 09508b4..c9fe4da 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -6,6 +6,7 @@ */ /dts-v1/; +#include <dt-bindings/interrupt-controller/irq.h> / { model = "MPC8313ERDB"; @@ -38,7 +39,7 @@ PowerPC,8313@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; @@ -48,7 +49,7 @@ localbus@e0005000 { #size-cells = <1>; compatible = "fsl,mpc8313-elbc", "fsl,elbc", "simple-bus"; reg = <0xe0005000 0x1000>; - interrupts = <77 0x8>; + interrupts = <77 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; // CS0 and CS1 are swapped when @@ -118,7 +119,7 @@ i2c@3000 { cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <14 0x8>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; rtc@68 { @@ -131,7 +132,7 @@ crypto@30000 { compatible = "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <11 0x8>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,num-channels = <1>; fsl,channel-fifo-len = <24>; @@ -146,7 +147,7 @@ i2c@3100 { cell-index = <1>; compatible = "fsl-i2c"; reg = <0x3100 0x100>; - interrupts = <15 0x8>; + interrupts = <15 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; }; @@ -155,7 +156,7 @@ spi@7000 { cell-index = <0>; compatible = "fsl,spi"; reg = <0x7000 0x1000>; - interrupts = <16 0x8>; + interrupts = <16 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; mode = "cpu"; }; @@ -167,7 +168,7 @@ usb@23000 { #address-cells = <1>; #size-cells = <0>; interrupt-parent = <&ipic>; - interrupts = <38 0x8>; + interrupts = <38 IRQ_TYPE_LEVEL_LOW>; phy_type = "utmi_wide"; sleep = <&pmc 0x00300000>; }; @@ -175,7 +176,8 @@ usb@23000 { ptp_clock@24E00 { compatible = "fsl,etsec-ptp"; reg = <0x24E00 0xB0>; - interrupts = <12 0x8 13 0x8>; + interrupts = <12 IRQ_TYPE_LEVEL_LOW>, + <13 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = < &ipic >; fsl,tclk-period = <10>; fsl,tmr-prsc = <100>; @@ -197,7 +199,9 @@ enet0: ethernet@24000 { compatible = "gianfar"; reg = <0x24000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <37 0x8 36 0x8 35 0x8>; + interrupts = <37 IRQ_TYPE_LEVEL_LOW>, + <36 IRQ_TYPE_LEVEL_LOW>, + <35 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = < &tbi0 >; /* Vitesse 7385 isn't on the MDIO bus */ @@ -211,7 +215,7 @@ mdio@520 { reg = <0x520 0x20>; phy4: ethernet-phy@4 { interrupt-parent = <&ipic>; - interrupts = <20 0x8>; + interrupts = <20 IRQ_TYPE_LEVEL_LOW>; reg = <0x4>; }; tbi0: tbi-phy@11 { @@ -231,7 +235,9 @@ enet1: ethernet@25000 { reg = <0x25000 0x1000>; ranges = <0x0 0x25000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <34 0x8 33 0x8 32 0x8>; + interrupts = <34 IRQ_TYPE_LEVEL_LOW>, + <33 IRQ_TYPE_LEVEL_LOW>, + <32 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = < &tbi1 >; phy-handle = < &phy4 >; @@ -259,7 +265,7 @@ serial0: serial@4500 { compatible = "fsl,ns16550", "ns16550"; reg = <0x4500 0x100>; clock-frequency = <0>; - interrupts = <9 0x8>; + interrupts = <9 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -269,15 +275,12 @@ serial1: serial@4600 { compatible = "fsl,ns16550", "ns16550"; reg = <0x4600 0x100>; clock-frequency = <0>; - interrupts = <10 0x8>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; /* IPIC - * interrupts cell = <intr #, sense> - * sense values match linux IORESOURCE_IRQ_* defines: - * sense == 8: Level, low assertion - * sense == 2: Edge, high-to-low change + * interrupts cell = <intr #, type> */ ipic: pic@700 { interrupt-controller; @@ -290,7 +293,7 @@ ipic: pic@700 { pmc: power@b00 { compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc"; reg = <0xb00 0x100 0xa00 0x100>; - interrupts = <80 8>; + interrupts = <80 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; @@ -306,14 +309,20 @@ pmc: power@b00 { gtm1: timer@500 { compatible = "fsl,mpc8313-gtm", "fsl,gtm"; reg = <0x500 0x100>; - interrupts = <90 8 78 8 84 8 72 8>; + interrupts = <90 IRQ_TYPE_LEVEL_LOW>, + <78 IRQ_TYPE_LEVEL_LOW>, + <84 IRQ_TYPE_LEVEL_LOW>, + <72 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; timer@600 { compatible = "fsl,mpc8313-gtm", "fsl,gtm"; reg = <0x600 0x100>; - interrupts = <91 8 79 8 85 8 73 8>; + interrupts = <91 IRQ_TYPE_LEVEL_LOW>, + <79 IRQ_TYPE_LEVEL_LOW>, + <85 IRQ_TYPE_LEVEL_LOW>, + <73 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; }; @@ -341,7 +350,7 @@ pci0: pci@e0008500 { 0x7800 0x0 0x0 0x3 &ipic 17 0x8 0x7800 0x0 0x0 0x4 &ipic 18 0x8>; interrupt-parent = <&ipic>; - interrupts = <66 0x8>; + interrupts = <66 IRQ_TYPE_LEVEL_LOW>; bus-range = <0x0 0x0>; ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000 @@ -363,14 +372,14 @@ dma@82a8 { reg = <0xe00082a8 4>; ranges = <0 0xe0008100 0x1a8>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; dma-channel@0 { compatible = "fsl,mpc8313-dma-channel", "fsl,elo-dma-channel"; reg = <0 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <0>; }; @@ -379,7 +388,7 @@ dma-channel@80 { "fsl,elo-dma-channel"; reg = <0x80 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <1>; }; @@ -388,7 +397,7 @@ dma-channel@100 { "fsl,elo-dma-channel"; reg = <0x100 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <2>; }; @@ -397,7 +406,7 @@ dma-channel@180 { "fsl,elo-dma-channel"; reg = <0x180 0x28>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <3>; }; };
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index a8f68d6..7ba1159 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -40,7 +40,7 @@ PowerPC,8315@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB at 0 }; @@ -50,7 +50,7 @@ localbus@e0005000 { #size-cells = <1>; compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus"; reg = <0xe0005000 0x1000>; - interrupts = <77 0x8>; + interrupts = <77 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; // CS0 and CS1 are swapped when @@ -112,7 +112,7 @@ i2c@3000 { cell-index = <0>; compatible = "fsl-i2c"; reg = <0x3000 0x100>; - interrupts = <14 0x8>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; dfsrr; rtc@68 { @@ -133,8 +133,10 @@ spi@7000 { cell-index = <0>; compatible = "fsl,spi"; reg = <0x7000 0x1000>; - interrupts = <16 0x8>; + interrupts = <16 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; + #address-cells = <1>; + #size-cells = <0>; mode = "cpu"; }; @@ -145,35 +147,35 @@ dma@82a8 { reg = <0x82a8 4>; ranges = <0 0x8100 0x1a8>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; cell-index = <0>; dma-channel@0 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0 0x80>; cell-index = <0>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@80 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x80 0x80>; cell-index = <1>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@100 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x100 0x80>; cell-index = <2>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; dma-channel@180 { compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel"; reg = <0x180 0x28>; cell-index = <3>; interrupt-parent = <&ipic>; - interrupts = <71 8>; + interrupts = <71 IRQ_TYPE_LEVEL_LOW>; }; }; @@ -183,7 +185,7 @@ usb@23000 { #address-cells = <1>; #size-cells = <0>; interrupt-parent = <&ipic>; - interrupts = <38 0x8>; + interrupts = <38 IRQ_TYPE_LEVEL_LOW>; phy_type = "utmi"; }; @@ -197,7 +199,9 @@ enet0: ethernet@24000 { reg = <0x24000 0x1000>; ranges = <0x0 0x24000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <32 0x8 33 0x8 34 0x8>; + interrupts = <32 IRQ_TYPE_LEVEL_LOW>, + <33 IRQ_TYPE_LEVEL_LOW>, + <34 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = <&tbi0>; phy-handle = < &phy0 >; @@ -238,7 +242,9 @@ enet1: ethernet@25000 { reg = <0x25000 0x1000>; ranges = <0x0 0x25000 0x1000>; local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <35 0x8 36 0x8 37 0x8>; + interrupts = <35 IRQ_TYPE_LEVEL_LOW>, + <36 IRQ_TYPE_LEVEL_LOW>, + <37 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; tbi-handle = <&tbi1>; phy-handle = < &phy1 >; @@ -263,7 +269,7 @@ serial0: serial@4500 { compatible = "fsl,ns16550", "ns16550"; reg = <0x4500 0x100>; clock-frequency = <133333333>; - interrupts = <9 0x8>; + interrupts = <9 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -273,7 +279,7 @@ serial1: serial@4600 { compatible = "fsl,ns16550", "ns16550"; reg = <0x4600 0x100>; clock-frequency = <133333333>; - interrupts = <10 0x8>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -282,7 +288,7 @@ crypto@30000 { "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0"; reg = <0x30000 0x10000>; - interrupts = <11 0x8>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,num-channels = <4>; fsl,channel-fifo-len = <24>; @@ -294,7 +300,7 @@ sata@18000 { compatible = "fsl,mpc8315-sata", "fsl,pq-sata"; reg = <0x18000 0x1000>; cell-index = <1>; - interrupts = <44 0x8>; + interrupts = <44 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; @@ -302,14 +308,17 @@ sata@19000 { compatible = "fsl,mpc8315-sata", "fsl,pq-sata"; reg = <0x19000 0x1000>; cell-index = <2>; - interrupts = <45 0x8>; + interrupts = <45 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; }; gtm1: timer@500 { compatible = "fsl,mpc8315-gtm", "fsl,gtm"; reg = <0x500 0x100>; - interrupts = <90 8 78 8 84 8 72 8>; + interrupts = <90 IRQ_TYPE_LEVEL_LOW>, + <78 IRQ_TYPE_LEVEL_LOW>, + <84 IRQ_TYPE_LEVEL_LOW>, + <72 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; clock-frequency = <133333333>; }; @@ -317,16 +326,16 @@ gtm1: timer@500 { timer@600 { compatible = "fsl,mpc8315-gtm", "fsl,gtm"; reg = <0x600 0x100>; - interrupts = <91 8 79 8 85 8 73 8>; + interrupts = <91 IRQ_TYPE_LEVEL_LOW>, + <79 IRQ_TYPE_LEVEL_LOW>, + <85 IRQ_TYPE_LEVEL_LOW>, + <73 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; clock-frequency = <133333333>; }; /* IPIC - * interrupts cell = <intr #, sense> - * sense values match linux IORESOURCE_IRQ_* defines: - * sense == 8: Level, low assertion - * sense == 2: Edge, high-to-low change + * interrupts cell = <intr #, type> */ ipic: interrupt-controller@700 { interrupt-controller; @@ -340,14 +349,14 @@ ipic-msi@7c0 { compatible = "fsl,ipic-msi"; reg = <0x7c0 0x40>; msi-available-ranges = <0 0x100>; - interrupts = <0x43 0x8 - 0x4 0x8 - 0x51 0x8 - 0x52 0x8 - 0x56 0x8 - 0x57 0x8 - 0x58 0x8 - 0x59 0x8>; + interrupts = <0x43 IRQ_TYPE_LEVEL_LOW + 0x4 IRQ_TYPE_LEVEL_LOW + 0x51 IRQ_TYPE_LEVEL_LOW + 0x52 IRQ_TYPE_LEVEL_LOW + 0x56 IRQ_TYPE_LEVEL_LOW + 0x57 IRQ_TYPE_LEVEL_LOW + 0x58 IRQ_TYPE_LEVEL_LOW + 0x59 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = < &ipic >; }; @@ -355,7 +364,7 @@ pmc: power@b00 { compatible = "fsl,mpc8315-pmc", "fsl,mpc8313-pmc", "fsl,mpc8349-pmc"; reg = <0xb00 0x100 0xa00 0x100>; - interrupts = <80 8>; + interrupts = <80 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; @@ -374,24 +383,24 @@ pci0: pci@e0008500 { interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0E -mini PCI */ - 0x7000 0x0 0x0 0x1 &ipic 18 0x8 - 0x7000 0x0 0x0 0x2 &ipic 18 0x8 - 0x7000 0x0 0x0 0x3 &ipic 18 0x8 - 0x7000 0x0 0x0 0x4 &ipic 18 0x8 + 0x7000 0x0 0x0 0x1 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x2 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x3 &ipic 18 IRQ_TYPE_LEVEL_LOW + 0x7000 0x0 0x0 0x4 &ipic 18 IRQ_TYPE_LEVEL_LOW /* IDSEL 0x0F -mini PCI */ - 0x7800 0x0 0x0 0x1 &ipic 17 0x8 - 0x7800 0x0 0x0 0x2 &ipic 17 0x8 - 0x7800 0x0 0x0 0x3 &ipic 17 0x8 - 0x7800 0x0 0x0 0x4 &ipic 17 0x8 + 0x7800 0x0 0x0 0x1 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x2 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x3 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x7800 0x0 0x0 0x4 &ipic 17 IRQ_TYPE_LEVEL_LOW /* IDSEL 0x10 - PCI slot */ - 0x8000 0x0 0x0 0x1 &ipic 48 0x8 - 0x8000 0x0 0x0 0x2 &ipic 17 0x8 - 0x8000 0x0 0x0 0x3 &ipic 48 0x8 - 0x8000 0x0 0x0 0x4 &ipic 17 0x8>; + 0x8000 0x0 0x0 0x1 &ipic 48 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x2 &ipic 17 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x3 &ipic 48 IRQ_TYPE_LEVEL_LOW + 0x8000 0x0 0x0 0x4 &ipic 17 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&ipic>; - interrupts = <66 0x8>; + interrupts = <66 IRQ_TYPE_LEVEL_LOW>; bus-range = <0x0 0x0>; ranges = <0x02000000 0 0x90000000 0x90000000 0 0x10000000 0x42000000 0 0x80000000 0x80000000 0 0x10000000 @@ -417,10 +426,10 @@ pci1: pcie@e0009000 { 0x01000000 0 0x00000000 0xb1000000 0 0x00800000>; bus-range = <0 255>; interrupt-map-mask = <0xf800 0 0 7>; - interrupt-map = <0 0 0 1 &ipic 1 8 - 0 0 0 2 &ipic 1 8 - 0 0 0 3 &ipic 1 8 - 0 0 0 4 &ipic 1 8>; + interrupt-map = <0 0 0 1 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 2 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 3 &ipic 1 IRQ_TYPE_LEVEL_LOW + 0 0 0 4 &ipic 1 IRQ_TYPE_LEVEL_LOW>; clock-frequency = <0>; pcie@0 { @@ -448,10 +457,10 @@ pci2: pcie@e000a000 { 0x01000000 0 0x00000000 0xd1000000 0 0x00800000>; bus-range = <0 255>; interrupt-map-mask = <0xf800 0 0 7>; - interrupt-map = <0 0 0 1 &ipic 2 8 - 0 0 0 2 &ipic 2 8 - 0 0 0 3 &ipic 2 8 - 0 0 0 4 &ipic 2 8>; + interrupt-map = <0 0 0 1 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 2 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 3 &ipic 2 IRQ_TYPE_LEVEL_LOW + 0 0 0 4 &ipic 2 IRQ_TYPE_LEVEL_LOW>; clock-frequency = <0>; pcie@0 { @@ -471,12 +480,12 @@ pcie@0 { leds { compatible = "gpio-leds"; - pwr { + led-pwr { gpios = <&mcu_pio 0 0>; default-state = "on"; }; - hdd { + led-hdd { gpios = <&mcu_pio 1 0>; linux,default-trigger = "disk-activity"; };
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index ba7caaf..06f1344 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -38,7 +38,7 @@ PowerPC,8323@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; };
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts index 13f1723..12d33cb 100644 --- a/arch/powerpc/boot/dts/mpc8349emitx.dts +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -39,7 +39,7 @@ PowerPC,8349@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; };
diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts index eae0afd..2998a23 100644 --- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts +++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
@@ -37,7 +37,7 @@ PowerPC,8349@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; };
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts index f137ccb..fb311a7 100644 --- a/arch/powerpc/boot/dts/mpc8377_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -39,7 +39,7 @@ PowerPC,8377@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 };
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts index ce254dd..f736a15 100644 --- a/arch/powerpc/boot/dts/mpc8377_wlan.dts +++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -40,7 +40,7 @@ PowerPC,8377@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x20000000>; // 512MB at 0 };
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts index 19e5473..32c4962 100644 --- a/arch/powerpc/boot/dts/mpc8378_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -39,7 +39,7 @@ PowerPC,8378@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 };
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts index 61519ac..07deb89 100644 --- a/arch/powerpc/boot/dts/mpc8379_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -37,7 +37,7 @@ PowerPC,8379@0 { }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x10000000>; // 256MB at 0 };
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 2d71e4b..496ecc6 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -120,10 +120,8 @@ #if defined(CONFIG_44x) #include <asm/nohash/32/pte-44x.h> -#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT) -#include <asm/nohash/pte-e500.h> #elif defined(CONFIG_PPC_85xx) -#include <asm/nohash/32/pte-85xx.h> +#include <asm/nohash/pte-e500.h> #elif defined(CONFIG_PPC_8xx) #include <asm/nohash/32/pte-8xx.h> #endif
diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h deleted file mode 100644 index 14d64b4..0000000 --- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h +++ /dev/null
@@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H -#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H -#ifdef __KERNEL__ - -/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based - * processors - * - MMU Assist Register 3: - - 32 33 34 35 36 ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - RPN...................... 0 0 U0 U1 U2 U3 UX SX UW SW UR SR - - - PRESENT *must* be in the bottom two bits because swap PTEs use - the top 30 bits. - -*/ - -/* Definitions for FSL Book-E Cores */ -#define _PAGE_READ 0x00001 /* H: Read permission (SR) */ -#define _PAGE_PRESENT 0x00002 /* S: PTE contains a translation */ -#define _PAGE_WRITE 0x00004 /* S: Write permission (SW) */ -#define _PAGE_DIRTY 0x00008 /* S: Page dirty */ -#define _PAGE_EXEC 0x00010 /* H: SX permission */ -#define _PAGE_ACCESSED 0x00020 /* S: Page referenced */ - -#define _PAGE_ENDIAN 0x00040 /* H: E bit */ -#define _PAGE_GUARDED 0x00080 /* H: G bit */ -#define _PAGE_COHERENT 0x00100 /* H: M bit */ -#define _PAGE_NO_CACHE 0x00200 /* H: I bit */ -#define _PAGE_WRITETHRU 0x00400 /* H: W bit */ -#define _PAGE_SPECIAL 0x00800 /* S: Special page */ - -#define _PMD_PRESENT 0 -#define _PMD_PRESENT_MASK (PAGE_MASK) -#define _PMD_BAD (~PAGE_MASK) -#define _PMD_USER 0 - -#define _PTE_NONE_MASK 0 - -#define PTE_WIMGE_SHIFT (6) - -/* - * We define 2 sets of base prot bits, one for basic pages (ie, - * cacheable kernel and user pages) and one for non cacheable - * pages. We always set _PAGE_COHERENT when SMP is enabled or - * the processor might need it for DMA coherency. - */ -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) -#else -#define _PAGE_BASE (_PAGE_BASE_NC) -#endif - -#include <asm/pgtable-masks.h> - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index f3086e3..e8dec88 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -49,7 +49,7 @@ static inline unsigned long pud_val(pud_t x) #endif /* CONFIG_PPC64 */ /* PGD level */ -#if defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT) +#if defined(CONFIG_PPC_85xx) typedef struct { unsigned long long pgd; } pgd_t; static inline unsigned long long pgd_val(pgd_t x)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ba1d878c..17e6324 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h
@@ -15,6 +15,9 @@ #define TASK_SIZE_MAX TASK_SIZE_USER64 #endif +/* Threshold above which VMX copy path is used */ +#define VMX_COPY_THRESHOLD 3328 + #include <asm-generic/access_ok.h> /* @@ -255,7 +258,7 @@ __gus_failed: \ ".section .fixup,\"ax\"\n" \ "4: li %0,%3\n" \ " li %1,0\n" \ - " li %1+1,0\n" \ + " li %L1,0\n" \ " b 3b\n" \ ".previous\n" \ EX_TABLE(1b, 4b) \ @@ -326,40 +329,62 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef __powerpc64__ +unsigned long __copy_tofrom_user_base(void __user *to, + const void __user *from, unsigned long size); + +unsigned long __copy_tofrom_user_power7_vmx(void __user *to, + const void __user *from, unsigned long size); + +static __always_inline bool will_use_vmx(unsigned long n) +{ + return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY) && + n > VMX_COPY_THRESHOLD; +} + +static __always_inline unsigned long +raw_copy_tofrom_user(void __user *to, const void __user *from, + unsigned long n, unsigned long dir) +{ + unsigned long ret; + + if (will_use_vmx(n) && enter_vmx_usercopy()) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_power7_vmx(to, from, n); + prevent_user_access(dir); + exit_vmx_usercopy(); + + if (unlikely(ret)) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_base(to, from, n); + prevent_user_access(dir); + } + return ret; + } + + allow_user_access(to, dir); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(dir); + return ret; +} + +#ifdef CONFIG_PPC64 static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - unsigned long ret; - barrier_nospec(); - allow_user_access(to, KUAP_READ_WRITE); - ret = __copy_tofrom_user(to, from, n); - prevent_user_access(KUAP_READ_WRITE); - return ret; + return raw_copy_tofrom_user(to, from, n, KUAP_READ_WRITE); } -#endif /* __powerpc64__ */ +#endif /* CONFIG_PPC64 */ -static inline unsigned long raw_copy_from_user(void *to, - const void __user *from, unsigned long n) +static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long ret; - - allow_user_access(NULL, KUAP_READ); - ret = __copy_tofrom_user((__force void __user *)to, from, n); - prevent_user_access(KUAP_READ); - return ret; + return raw_copy_tofrom_user((__force void __user *)to, from, n, KUAP_READ); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long ret; - - allow_user_access(to, KUAP_WRITE); - ret = __copy_tofrom_user(to, (__force const void __user *)from, n); - prevent_user_access(KUAP_WRITE); - return ret; + return raw_copy_tofrom_user(to, (__force const void __user *)from, n, KUAP_WRITE); } unsigned long __arch_clear_user(void __user *addr, unsigned long size);
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 73e10bd..8b4de50 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c
@@ -67,7 +67,7 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, } bool arch_dma_alloc_direct(struct device *dev) { - if (dev->dma_ops_bypass) + if (dev->dma_ops_bypass && dev->bus_dma_limit) return true; return false; @@ -75,7 +75,7 @@ bool arch_dma_alloc_direct(struct device *dev) bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle) { - if (!dev->dma_ops_bypass) + if (!dev->dma_ops_bypass || !dev->bus_dma_limit) return false; return is_direct_handle(dev, dma_handle);
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index f9a73fa..8867596 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S
@@ -305,7 +305,6 @@ * r12 is pointer to the pte * r10 is the pshift from the PGD, if we're a hugepage */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_HUGETLB_PAGE #define FIND_PTE \ rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ @@ -329,15 +328,6 @@ rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #endif /* HUGEPAGE */ -#else /* !PTE_64BIT */ -#define FIND_PTE \ - rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ - lwz r11, 0(r11); /* Get L1 entry */ \ - rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ - beq 2f; /* Bail if no table */ \ - rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \ - lwz r11, 0(r12); /* Get Linux PTE */ -#endif /* * Interrupt vector entry code @@ -473,22 +463,16 @@ 4: FIND_PTE -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED -#endif andc. r13,r13,r11 /* Check permission */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP subf r13,r11,r12 /* create false data dep */ lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif -#endif bne 2f /* Bail if permission/valid mismatch */ @@ -552,12 +536,8 @@ FIND_PTE /* Make up the required permissions for kernel code */ -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_SX oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#endif b 4f /* Get the PGD for the current thread */ @@ -573,24 +553,18 @@ FIND_PTE /* Make up the required permissions for user code */ -#ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX oris r13,r13,_PAGE_ACCESSED@h -#else - li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#endif 4: andc. r13,r13,r11 /* Check permission */ -#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP subf r13,r11,r12 /* create false data dep */ lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif -#endif bne 2f /* Bail if permission mismatch */ @@ -683,7 +657,7 @@ * r10 - tsize encoding (if HUGETLB_PAGE) or available to use * r11 - TLB (info from Linux PTE) * r12 - available to use - * r13 - upper bits of PTE (if PTE_64BIT) or available to use + * r13 - upper bits of PTE * CR5 - results of addr >= PAGE_OFFSET * MAS0, MAS1 - loaded with proper value when we get here * MAS2, MAS3 - will need additional info from Linux PTE @@ -751,7 +725,6 @@ * here we (properly should) assume have the appropriate value. */ finish_tlb_load_cont: -#ifdef CONFIG_PTE_64BIT rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ andi. r10, r11, _PAGE_DIRTY bne 1f @@ -764,26 +737,9 @@ srwi r10, r13, 12 /* grab RPN[12:31] */ mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) -#else - li r10, (_PAGE_EXEC | _PAGE_READ) - mr r13, r11 - rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ - and r12, r11, r10 - mcrf cr0, cr5 /* Test for user page */ - slwi r10, r12, 1 - or r10, r10, r12 - rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - isellt r12, r10, r12 - rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ - mtspr SPRN_MAS3, r13 -#endif mfspr r12, SPRN_MAS2 -#ifdef CONFIG_PTE_64BIT rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ -#else - rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ -#endif #ifdef CONFIG_HUGETLB_PAGE beq 6, 3f /* don't mask if page isn't huge */ li r13, 1
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0ce7131..d122e84 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c
@@ -1159,7 +1159,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev, struct iommu_domain *old) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_driver_get_domain_for_dev(dev); struct iommu_table_group *table_group; struct iommu_group *grp;
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 756043d..fb9fbf0 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -212,6 +212,13 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit + * if MSI (rather than MSI-X) capability does not have + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. + */ + dev->msi_addr_mask = DMA_BIT_MASK(64); + /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 827c958..f26e80c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c
@@ -2893,7 +2893,8 @@ static void __init fixup_device_tree_pmac(void) for (node = 0; prom_next_node(&node); ) { type[0] = '\0'; prom_getprop(node, "device_type", type, sizeof(type)); - if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s") && + prom_strcmp(type, "media-bay")) continue; if (prom_getproplen(node, "#size-cells") != PROM_ERROR)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index cb5b73a..b176190 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c
@@ -35,7 +35,6 @@ #include <linux/of_irq.h> #include <linux/hugetlb.h> #include <linux/pgtable.h> -#include <asm/kexec.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/processor.h> @@ -995,15 +994,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(); - /* - * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM and - * hugetlb. These must be called after initmem_init(), so that - * pageblock_order is initialised. - */ - fadump_cma_init(); - kdump_cma_reserve(); - kvm_cma_reserve(); - early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); if (ppc_md.setup_arch)
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 841d077..1b2f293 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -37,11 +37,29 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && - !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { - addr += MCOUNT_INSN_SIZE; - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } else if (IS_ENABLED(CONFIG_CC_IS_CLANG) && IS_ENABLED(CONFIG_PPC64)) { + /* + * addr points to global entry point though the NOP was emitted at local + * entry point due to https://github.com/llvm/llvm-project/issues/163706 + * Handle that here with ppc_function_entry() for kernel symbols while + * adjusting module addresses in the else case, by looking for the below + * module global entry point sequence: + * ld r2, -8(r12) + * add r2, r2, r12 + */ + if (is_kernel_text(addr) || is_kernel_inittext(addr)) + addr = ppc_function_entry((void *)addr); + else if ((ppc_inst_val(ppc_inst_read((u32 *)addr)) == + PPC_RAW_LD(_R2, _R12, -8)) && + (ppc_inst_val(ppc_inst_read((u32 *)(addr+4))) == + PPC_RAW_ADD(_R2, _R2, _R12))) + addr += 8; + } } return addr;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1585029..8fc11d6 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -397,6 +397,7 @@ _end = . ; DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 104c055..dc44f11 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c
@@ -23,6 +23,7 @@ #include <asm/firmware.h> #define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) +#define __be_word __PASTE(__be, BITS_PER_LONG) #ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) @@ -146,25 +147,25 @@ int __init overlaps_crashkernel(unsigned long start, unsigned long size) } /* Values we need to export to the second kernel via the device tree. */ -static phys_addr_t crashk_base; -static phys_addr_t crashk_size; -static unsigned long long mem_limit; +static __be_word crashk_base; +static __be_word crashk_size; +static __be_word mem_limit; static struct property crashk_base_prop = { .name = "linux,crashkernel-base", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &crashk_base }; static struct property crashk_size_prop = { .name = "linux,crashkernel-size", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &crashk_size, }; static struct property memory_limit_prop = { .name = "linux,memory-limit", - .length = sizeof(unsigned long long), + .length = sizeof(__be_word), .value = &mem_limit, }; @@ -193,11 +194,11 @@ static void __init export_crashk_values(struct device_node *node) } #endif /* CONFIG_CRASH_RESERVE */ -static phys_addr_t kernel_end; +static __be_word kernel_end; static struct property kernel_end_prop = { .name = "linux,kernel-end", - .length = sizeof(phys_addr_t), + .length = sizeof(__be_word), .value = &kernel_end, };
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index e7ef8b2..5f6d50e 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c
@@ -450,6 +450,11 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) kbuf->buffer = headers; kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; kbuf->bufsz = headers_sz; + + /* + * Account for extra space required to accommodate additional memory + * ranges in elfcorehdr due to memory hotplug events. + */ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem); kbuf->top_down = false; @@ -460,7 +465,14 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) } image->elf_load_addr = kbuf->mem; - image->elf_headers_sz = headers_sz; + + /* + * If CONFIG_CRASH_HOTPLUG is enabled, the elfcorehdr kexec segment + * memsz can be larger than bufsz. Always initialize elf_headers_sz + * with memsz. This ensures the correct size is reserved for elfcorehdr + * memory in the FDT prepared for kdump. + */ + image->elf_headers_sz = kbuf->memsz; image->elf_headers = headers; out: kfree(cmem);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index c9a2d50..9a0d1c1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_PR_POSSIBLE bool select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER config KVM_BOOK3S_HV_POSSIBLE bool @@ -81,7 +80,6 @@ tristate "KVM for POWER7 and later using hypervisor mode in host" depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE - select KVM_GENERIC_MMU_NOTIFIER select KVM_BOOK3S_HV_PMU select CMA help @@ -203,7 +201,6 @@ depends on !CONTEXT_TRACKING_USER select KVM select KVM_MMIO - select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -220,7 +217,6 @@ select KVM select KVM_MMIO select KVM_BOOKE_HV - select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500MC/E5500/E6500 guest kernels in virtual machines on E500MC/E5500/E6500 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d79c5d1..2efbe05 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c
@@ -38,7 +38,7 @@ /* #define EXIT_DEBUG */ -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -53,7 +53,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits),
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3401b96b..f3ddb24 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c
@@ -36,7 +36,7 @@ unsigned long kvmppc_booke_handlers; -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) @@ -51,7 +51,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, sum_exits), STATS_DESC_COUNTER(VCPU, mmio_exits),
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index f9acf86..e4469ad 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h
@@ -39,15 +39,11 @@ enum vcpu_ftr { /* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ #define E500_TLB_MAS2_ATTR (0x7f) -struct tlbe_ref { +struct tlbe_priv { kvm_pfn_t pfn; /* valid only for TLB0, except briefly */ unsigned int flags; /* E500_TLB_* */ }; -struct tlbe_priv { - struct tlbe_ref ref; -}; - #ifdef CONFIG_KVM_E500V2 struct vcpu_id_table; #endif
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 48580c8..75ed149 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c
@@ -920,12 +920,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_priv[0] = kzalloc_objs(struct tlbe_ref, + vcpu_e500->gtlb_priv[0] = kzalloc_objs(struct tlbe_priv, vcpu_e500->gtlb_params[0].entries); if (!vcpu_e500->gtlb_priv[0]) goto free_vcpu; - vcpu_e500->gtlb_priv[1] = kzalloc_objs(struct tlbe_ref, + vcpu_e500->gtlb_priv[1] = kzalloc_objs(struct tlbe_priv, vcpu_e500->gtlb_params[1].entries); if (!vcpu_e500->gtlb_priv[1]) goto free_vcpu;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 06caf8b..37e0d3d 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -189,16 +189,16 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); - struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; + struct tlbe_priv *tlbe = &vcpu_e500->gtlb_priv[tlbsel][esel]; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) { - WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), - "%s: flags %x\n", __func__, ref->flags); + if (!(tlbe->flags & E500_TLB_VALID)) { + WARN(tlbe->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, tlbe->flags); WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); } - if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { + if (tlbsel == 1 && tlbe->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; int hw_tlb_indx; unsigned long flags; @@ -216,28 +216,28 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, } mb(); vcpu_e500->g2h_tlb1_map[esel] = 0; - ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); + tlbe->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); local_irq_restore(flags); } - if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { + if (tlbsel == 1 && tlbe->flags & E500_TLB_TLB0) { /* * TLB1 entry is backed by 4k pages. This should happen * rarely and is not worth optimizing. Invalidate everything. */ kvmppc_e500_tlbil_all(vcpu_e500); - ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); + tlbe->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); } /* * If TLB entry is still valid then it's a TLB0 entry, and thus * backed by at most one host tlbe per shadow pid */ - if (ref->flags & E500_TLB_VALID) + if (tlbe->flags & E500_TLB_VALID) kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); /* Mark the TLB as not backed by the host anymore */ - ref->flags = 0; + tlbe->flags = 0; } static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) @@ -245,26 +245,26 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct kvm_book3e_206_tlb_entry *gtlbe, - kvm_pfn_t pfn, unsigned int wimg, - bool writable) +static inline void kvmppc_e500_tlbe_setup(struct tlbe_priv *tlbe, + struct kvm_book3e_206_tlb_entry *gtlbe, + kvm_pfn_t pfn, unsigned int wimg, + bool writable) { - ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + tlbe->pfn = pfn; + tlbe->flags = E500_TLB_VALID; if (writable) - ref->flags |= E500_TLB_WRITABLE; + tlbe->flags |= E500_TLB_WRITABLE; /* Use guest supplied MAS2_G and MAS2_E */ - ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; + tlbe->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; } -static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) +static inline void kvmppc_e500_tlbe_release(struct tlbe_priv *tlbe) { - if (ref->flags & E500_TLB_VALID) { + if (tlbe->flags & E500_TLB_VALID) { /* FIXME: don't log bogus pfn for TLB1 */ - trace_kvm_booke206_ref_release(ref->pfn, ref->flags); - ref->flags = 0; + trace_kvm_booke206_ref_release(tlbe->pfn, tlbe->flags); + tlbe->flags = 0; } } @@ -284,11 +284,8 @@ static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) int i; for (tlbsel = 0; tlbsel <= 1; tlbsel++) { - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) + kvmppc_e500_tlbe_release(&vcpu_e500->gtlb_priv[tlbsel][i]); } } @@ -304,18 +301,18 @@ void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) static void kvmppc_e500_setup_stlbe( struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe, - int tsize, struct tlbe_ref *ref, u64 gvaddr, + int tsize, struct tlbe_priv *tlbe, u64 gvaddr, struct kvm_book3e_206_tlb_entry *stlbe) { - kvm_pfn_t pfn = ref->pfn; + kvm_pfn_t pfn = tlbe->pfn; u32 pr = vcpu->arch.shared->msr & MSR_PR; - bool writable = !!(ref->flags & E500_TLB_WRITABLE); + bool writable = !!(tlbe->flags & E500_TLB_WRITABLE); - BUG_ON(!(ref->flags & E500_TLB_VALID)); + BUG_ON(!(tlbe->flags & E500_TLB_VALID)); /* Force IPROT=0 for all guest mappings. */ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); + stlbe->mas2 = (gvaddr & MAS2_EPN) | (tlbe->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr); } @@ -323,7 +320,7 @@ static void kvmppc_e500_setup_stlbe( static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, - struct tlbe_ref *ref) + struct tlbe_priv *tlbe) { struct kvm_memory_slot *slot; unsigned int psize; @@ -455,9 +452,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } } - kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable); + kvmppc_e500_tlbe_setup(tlbe, gtlbe, pfn, wimg, writable); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, - ref, gvaddr, stlbe); + tlbe, gvaddr, stlbe); writable = tlbe_is_writable(stlbe); /* Clear i-cache for new pages */ @@ -474,17 +471,17 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, struct kvm_book3e_206_tlb_entry *stlbe) { struct kvm_book3e_206_tlb_entry *gtlbe; - struct tlbe_ref *ref; + struct tlbe_priv *tlbe; int stlbsel = 0; int sesel = 0; int r; gtlbe = get_entry(vcpu_e500, 0, esel); - ref = &vcpu_e500->gtlb_priv[0][esel].ref; + tlbe = &vcpu_e500->gtlb_priv[0][esel]; r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, 0, stlbe, ref); + gtlbe, 0, stlbe, tlbe); if (r) return r; @@ -494,7 +491,7 @@ static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, } static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, - struct tlbe_ref *ref, + struct tlbe_priv *tlbe, int esel) { unsigned int sesel = vcpu_e500->host_tlb1_nv++; @@ -507,10 +504,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->gtlb_priv[1][esel].flags |= E500_TLB_BITMAP; vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; - WARN_ON(!(ref->flags & E500_TLB_VALID)); + WARN_ON(!(tlbe->flags & E500_TLB_VALID)); return sesel; } @@ -522,24 +519,24 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; + struct tlbe_priv *tlbe = &vcpu_e500->gtlb_priv[1][esel]; int sesel; int r; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - ref); + tlbe); if (r) return r; /* Use TLB0 when we can only map a page with 4k */ if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; + vcpu_e500->gtlb_priv[1][esel].flags |= E500_TLB_TLB0; write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); return 0; } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, tlbe, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -561,11 +558,11 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; /* Triggers after clear_tlb_privs or on initial mapping */ - if (!(priv->ref.flags & E500_TLB_VALID)) { + if (!(priv->flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); + priv, eaddr, &stlbe); write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); } break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9a89a6d..0030239 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c
@@ -623,12 +623,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(hv_enabled && kvmppc_hv_ops->enable_nested && !kvmppc_hv_ops->enable_nested(NULL)); break; -#endif - case KVM_CAP_SYNC_MMU: - BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER)); - r = 1; - break; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: r = hv_enabled; break;
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 9af969d..25a9910 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S
@@ -562,3 +562,4 @@ li r5,4096 b .Ldst_aligned EXPORT_SYMBOL(__copy_tofrom_user) +EXPORT_SYMBOL(__copy_tofrom_user_base)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 8474c68..17dbcfb 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S
@@ -5,13 +5,9 @@ * * Author: Anton Blanchard <anton@au.ibm.com> */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#ifndef SELFTEST_CASE -/* 0 == don't use VMX, 1 == use VMX */ -#define SELFTEST_CASE 0 -#endif - #ifdef __BIG_ENDIAN__ #define LVS(VRT,RA,RB) lvsl VRT,RA,RB #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC @@ -47,10 +43,14 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl CFUNC(exit_vmx_usercopy) + ld r6,STK_REG(R31)(r1) /* original destination pointer */ + ld r5,STK_REG(R29)(r1) /* original number of bytes */ + subf r7,r6,r3 /* #bytes copied */ + subf r3,r7,r5 /* #bytes not copied in r3 */ ld r0,STACKFRAMESIZE+16(r1) mtlr r0 - b .Lexit + addi r1,r1,STACKFRAMESIZE + blr #endif /* CONFIG_ALTIVEC */ .Ldo_err2: @@ -74,7 +74,6 @@ _GLOBAL(__copy_tofrom_user_power7) cmpldi r5,16 - cmpldi cr1,r5,3328 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) @@ -82,12 +81,6 @@ blt .Lshort_copy -#ifdef CONFIG_ALTIVEC -test_feature = SELFTEST_CASE -BEGIN_FTR_SECTION - bgt cr1,.Lvmx_copy -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif .Lnonvmx_copy: /* Get the source 8B aligned */ @@ -263,23 +256,14 @@ 15: li r3,0 blr -.Lunwind_stack_nonvmx_copy: - addi r1,r1,STACKFRAMESIZE - b .Lnonvmx_copy - -.Lvmx_copy: #ifdef CONFIG_ALTIVEC +_GLOBAL(__copy_tofrom_user_power7_vmx) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl CFUNC(enter_vmx_usercopy) - cmpwi cr1,r3,0 - ld r0,STACKFRAMESIZE+16(r1) - ld r3,STK_REG(R31)(r1) - ld r4,STK_REG(R30)(r1) - ld r5,STK_REG(R29)(r1) - mtlr r0 + std r3,STK_REG(R31)(r1) + std r5,STK_REG(R29)(r1) /* * We prefetch both the source and destination using enhanced touch * instructions. We use a stream ID of 0 for the load side and @@ -300,8 +284,6 @@ DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) - beq cr1,.Lunwind_stack_nonvmx_copy - /* * If source and destination are not relatively aligned we use a * slower permute loop. @@ -478,7 +460,8 @@ err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -681,5 +664,7 @@ err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx) #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 5434091..554b248 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c
@@ -27,6 +27,7 @@ int enter_vmx_usercopy(void) return 1; } +EXPORT_SYMBOL(enter_vmx_usercopy); /* * This function must return 0 because we tail call optimise when calling @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void) set_dec(1); return 0; } +EXPORT_SYMBOL(exit_vmx_usercopy); int enter_vmx_ops(void) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a985fc9..b7982d0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c
@@ -30,6 +30,10 @@ #include <asm/setup.h> #include <asm/fixmap.h> +#include <asm/fadump.h> +#include <asm/kexec.h> +#include <asm/kvm_ppc.h> + #include <mm/mmu_decl.h> unsigned long long memory_limit __initdata; @@ -268,6 +272,16 @@ void __init paging_init(void) void __init arch_mm_preinit(void) { + + /* + * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM + * and hugetlb. These must be called after pageblock_order is + * initialised. + */ + fadump_cma_init(); + kdump_cma_reserve(); + kvm_cma_reserve(); + /* * book3s is limited to 16 page sizes due to encoding this in * a 4-bit field for slices.
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 82bbf63..7354e1d 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h
@@ -81,9 +81,6 @@ #ifdef CONFIG_PPC64 -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6 * 8) - /* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ if (!image) \ @@ -219,8 +216,6 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg, u32 code); - -int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx); #endif #endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 52162e4..a62a9a9 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -450,7 +450,7 @@ bool bpf_jit_supports_subprog_tailcalls(void) bool bpf_jit_supports_kfunc_call(void) { - return true; + return IS_ENABLED(CONFIG_PPC64); } bool bpf_jit_supports_arena(void) @@ -638,19 +638,12 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context * for the traced function (BPF subprog/callee) to fetch it. */ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx, - int func_frame_offset, - int bpf_dummy_frame_size, int r4_off) + int bpf_frame_size, int r4_off) { if (IS_ENABLED(CONFIG_PPC64)) { - /* See Generated stack layout */ - int tailcallinfo_offset = BPF_PPC_TAILCALL; - - /* - * func_frame_offset = ...(1) - * bpf_dummy_frame_size + trampoline_frame_size - */ - EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset)); - EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset)); + EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size)); + /* Refer to trampoline's Generated stack layout */ + EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL)); /* * Setting the tail_call_info in trampoline's frame @@ -658,22 +651,14 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte */ EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT)); PPC_BCC_CONST_SHORT(COND_GT, 8); - EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL)); + /* - * From ...(1) above: - * trampoline_frame_bottom = ...(2) - * func_frame_offset - bpf_dummy_frame_size - * - * Using ...(2) derived above: - * trampoline_tail_call_info_offset = ...(3) - * trampoline_frame_bottom - tailcallinfo_offset - * - * From ...(3): - * Use trampoline_tail_call_info_offset to write reference of main's - * tail_call_info in trampoline frame. + * Trampoline's tail_call_info is at the same offset, as that of + * any bpf program, with reference to previous frame. Update the + * address of main's tail_call_info in trampoline frame. */ - EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size) - - tailcallinfo_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL)); } else { /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); @@ -681,7 +666,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte } static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, - int func_frame_offset, int r4_off) + int bpf_frame_size, int r4_off) { if (IS_ENABLED(CONFIG_PPC32)) { /* @@ -692,12 +677,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont } } -static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, - int nr_regs, int regs_off) +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, + int bpf_frame_size, int nr_regs, int regs_off) { int param_save_area_offset; - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset = bpf_frame_size; param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ for (int i = 0; i < nr_regs; i++) { @@ -720,11 +705,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context /* Used when we call into the traced function. Replicate parameter save area */ static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, - int func_frame_offset, int nr_regs, int regs_off) + int bpf_frame_size, int nr_regs, int regs_off) { int param_save_area_offset; - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset = bpf_frame_size; param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ for (int i = 8; i < nr_regs; i++) { @@ -741,10 +726,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im void *func_addr) { int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; - int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0; struct codegen_context codegen_ctx, *ctx; u32 *image = (u32 *)rw_image; ppc_inst_t branch_insn; @@ -770,24 +755,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Generated stack layout: * * func prev back chain [ back chain ] - * [ ] - * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) - * [ ] -- - * LR save area [ r0 save (64-bit) ] | header - * [ r0 save (32-bit) ] | - * dummy frame for unwind [ back chain 1 ] -- * [ tail_call_info ] optional - 64-bit powerpc * [ padding ] align stack frame * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc * alt_lr_off [ real lr (ool stub)] optional - actual lr + * retaddr_off [ return address ] * [ r26 ] * nvr_off [ r25 ] nvr save area * retval_off [ return value ] * [ reg argN ] * [ ... ] - * regs_off [ reg_arg1 ] prog ctx context - * nregs_off [ args count ] - * ip_off [ traced function ] + * regs_off [ reg_arg1 ] prog_ctx + * nregs_off [ args count ] ((u64 *)prog_ctx)[-1] + * ip_off [ traced function ] ((u64 *)prog_ctx)[-2] * [ ... ] * run_ctx_off [ bpf_tramp_run_ctx ] * [ reg argN ] @@ -843,6 +823,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im nvr_off = bpf_frame_size; bpf_frame_size += 2 * SZL; + /* Save area for return address */ + retaddr_off = bpf_frame_size; + bpf_frame_size += SZL; + /* Optional save area for actual LR in case of ool ftrace */ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { alt_lr_off = bpf_frame_size; @@ -869,16 +853,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Padding to align stack frame, if any */ bpf_frame_size = round_up(bpf_frame_size, SZL * 2); - /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ - bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; - - /* Offset to the traced function's stack frame */ - func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; - - /* Create dummy frame for unwind, store original return value */ + /* Store original return value */ EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); - /* Protect red zone where tail call count goes */ - EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); /* Create our stack frame */ EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); @@ -893,34 +869,44 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); - bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); + bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off); - /* Save our return address */ + /* Save our LR/return address */ EMIT(PPC_RAW_MFLR(_R3)); if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); else - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); /* - * Save ip address of the traced function. - * We could recover this from LR, but we will need to address for OOL trampoline, - * and optional GEP area. + * Derive IP address of the traced function. + * In case of CONFIG_PPC_FTRACE_OUT_OF_LINE or BPF program, LR points to the instruction + * after the 'bl' instruction in the OOL stub. Refer to ftrace_init_ool_stub() and + * bpf_arch_text_poke() for OOL stub of kernel functions and bpf programs respectively. + * Relevant stub sequence: + * + * bl <tramp> + * LR (R3) => mtlr r0 + * b <func_addr+4> + * + * Recover kernel function/bpf program address from the unconditional + * branch instruction at the end of OOL stub. */ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); - EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); } if (flags & BPF_TRAMP_F_IP_ARG) EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); - if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) - /* Fake our LR for unwind */ - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */ + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); + } /* Save function arg count -- see bpf_get_func_arg_cnt() */ EMIT(PPC_RAW_LI(_R3, nr_regs)); @@ -958,20 +944,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Call the traced function */ if (flags & BPF_TRAMP_F_CALL_ORIG) { /* - * The address in LR save area points to the correct point in the original function + * retaddr on trampoline stack points to the correct point in the original function * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction * sequence */ - EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off)); EMIT(PPC_RAW_MTCTR(_R3)); /* Replicate tail_call_cnt before calling the original BPF prog */ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset, - bpf_dummy_frame_size, r4_off); + bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off); /* Restore args */ - bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); + bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off); /* Restore TOC for 64-bit */ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) @@ -985,7 +970,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Restore updated tail_call_cnt */ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off); /* Reserve space to patch branch instruction to skip fexit progs */ if (ro_image) /* image is NULL for dummy pass */ @@ -1037,7 +1022,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im EMIT(PPC_RAW_LD(_R2, _R1, 24)); if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* Skip the traced function and return to parent */ - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLR()); @@ -1045,13 +1030,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_BLR()); } else { - EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off)); EMIT(PPC_RAW_MTCTR(_R0)); - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BCTR());
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b1a3945..c5e26d2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -32,23 +32,27 @@ * * [ prev sp ] <------------- * [ tail_call_info ] 8 | - * [ nv gpr save area ] 6*8 + (12*8) | + * [ nv gpr save area ] (6 * 8) | + * [ addl. nv gpr save area] (12 * 8) | <--- exception boundary/callback program * [ local_tmp_var ] 24 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- * - * Additional (12*8) in 'nv gpr save area' only in case of - * exception boundary. + * Additional (12 * 8) in 'nv gpr save area' only in case of + * exception boundary/callback. */ +/* BPF non-volatile registers save area size */ +#define BPF_PPC_STACK_SAVE (6 * 8) + /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 24 /* * for additional non volatile registers(r14-r25) to be saved * at exception boundary */ -#define BPF_PPC_EXC_STACK_SAVE (12*8) +#define BPF_PPC_EXC_STACK_SAVE (12 * 8) /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ @@ -125,12 +129,13 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- * [ tail_call_info ] 8 - * [ nv gpr save area ] 6*8 + (12*8) + * [ nv gpr save area ] (6 * 8) + * [ addl. nv gpr save area] (12 * 8) <--- exception boundary/callback program * [ local_tmp_var ] 24 * [ unused red zone ] 224 * - * Additional (12*8) in 'nv gpr save area' only in case of - * exception boundary. + * Additional (12 * 8) in 'nv gpr save area' only in case of + * exception boundary/callback. */ static int bpf_jit_stack_local(struct codegen_context *ctx) { @@ -148,7 +153,7 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) } } -int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) +static int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) { return bpf_jit_stack_local(ctx) + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE; } @@ -237,10 +242,6 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_has_stack_frame(ctx) && !ctx->exception_cb) { /* - * exception_cb uses boundary frame after stack walk. - * It can simply use redzone, this optimization reduces - * stack walk loop by one level. - * * We need a stack frame, but we don't necessarily need to * save/restore LR unless we call other functions */ @@ -284,6 +285,22 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * program(main prog) as third arg */ EMIT(PPC_RAW_MR(_R1, _R5)); + /* + * Exception callback reuses the stack frame of exception boundary. + * But BPF stack depth of exception callback and exception boundary + * don't have to be same. If BPF stack depth is different, adjust the + * stack frame size considering BPF stack depth of exception callback. + * The non-volatile register save area remains unchanged. These non- + * volatile registers are restored in exception callback's epilogue. + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R5, 0)); + EMIT(PPC_RAW_SUB(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_1), _R1)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + -BPF_PPC_EXC_STACKFRAME)); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_2), ctx->stack_size)); + PPC_BCC_CONST_SHORT(COND_EQ, 12); + EMIT(PPC_RAW_MR(_R1, bpf_to_ppc(TMP_REG_1))); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_EXC_STACKFRAME + ctx->stack_size))); } /* @@ -482,6 +499,83 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context * return 0; } +static int zero_extend(u32 *image, struct codegen_context *ctx, u32 src_reg, u32 dst_reg, u32 size) +{ + switch (size) { + case 1: + /* zero-extend 8 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 56)); + return 0; + case 2: + /* zero-extend 16 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 48)); + return 0; + case 4: + /* zero-extend 32 bits into 64 bits */ + EMIT(PPC_RAW_RLDICL(dst_reg, src_reg, 0, 32)); + fallthrough; + case 8: + /* Nothing to do */ + return 0; + default: + return -1; + } +} + +static int sign_extend(u32 *image, struct codegen_context *ctx, u32 src_reg, u32 dst_reg, u32 size) +{ + switch (size) { + case 1: + /* sign-extend 8 bits into 64 bits */ + EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); + return 0; + case 2: + /* sign-extend 16 bits into 64 bits */ + EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); + return 0; + case 4: + /* sign-extend 32 bits into 64 bits */ + EMIT(PPC_RAW_EXTSW(dst_reg, src_reg)); + fallthrough; + case 8: + /* Nothing to do */ + return 0; + default: + return -1; + } +} + +/* + * Handle powerpc ABI expectations from caller: + * - Unsigned arguments are zero-extended. + * - Signed arguments are sign-extended. + */ +static int prepare_for_kfunc_call(const struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, + const struct bpf_insn *insn) +{ + const struct btf_func_model *m = bpf_jit_find_kfunc_model(fp, insn); + int i; + + if (!m) + return -1; + + for (i = 0; i < m->nr_args; i++) { + /* Note that BPF ABI only allows up to 5 args for kfuncs */ + u32 reg = bpf_to_ppc(BPF_REG_1 + i), size = m->arg_size[i]; + + if (!(m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG)) { + if (zero_extend(image, ctx, reg, reg, size)) + return -1; + } else { + if (sign_extend(image, ctx, reg, reg, size)) + return -1; + } + } + + return 0; +} + static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* @@ -522,9 +616,30 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* * tail_call_info++; <- Actual value of tcc here + * Writeback this updated value only if tailcall succeeds. */ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_2), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + offsetof(struct bpf_array, ptrs))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_2), 0)); + PPC_BCC_SHORT(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), bpf_to_ppc(TMP_REG_2), + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_2))); + /* * Before writing updated tail_call_info, distinguish if current frame * is storing a reference to tail_call_info or actual tcc value in @@ -539,24 +654,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Writeback updated value to tail_call_info */ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); - /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); - EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); - - /* - * if (prog == NULL) - * goto out; - */ - EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); - PPC_BCC_SHORT(COND_EQ, out); - - /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), - FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); - EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); - /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -1123,14 +1220,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* special mov32 for zext */ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; - } else if (off == 8) { - EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); - } else if (off == 16) { - EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); - } else if (off == 32) { - EMIT(PPC_RAW_EXTSW(dst_reg, src_reg)); - } else if (dst_reg != src_reg) - EMIT(PPC_RAW_MR(dst_reg, src_reg)); + } + if (off == 0) { + /* MOV */ + if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + } else { + /* MOVSX: dst = (s8,s16,s32)src (off = 8,16,32) */ + if (sign_extend(image, ctx, src_reg, dst_reg, off / 8)) + return -1; + } goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ @@ -1598,6 +1697,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (ret < 0) return ret; + /* Take care of powerpc ABI requirements before kfunc call */ + if (insn[i].src_reg == BPF_PSEUDO_KFUNC_CALL) { + if (prepare_for_kfunc_call(fp, image, ctx, &insn[i])) + return -1; + } + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret;
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 26aa264..992cc5c 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c
@@ -103,6 +103,11 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + perf_callchain_store(entry, perf_arch_instruction_pointer(regs)); + + if (!current->mm) + return; + if (!is_32bit_task()) perf_callchain_user_64(entry, regs); else
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index ddcc2d8..0de21c5 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c
@@ -142,7 +142,6 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned int __user *) (unsigned long) sp;
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 115d1c1..30fb61c 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c
@@ -77,7 +77,6 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned long __user *) sp;
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 2b5d187..9ef8fb3 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -155,8 +155,8 @@ machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices); /* list of the supported boards */ static char *board[] __initdata = { - "Keymile,KMETER1", - "Keymile,kmpbec8321", + "keymile,KMETER1", + "keymile,kmpbec8321", NULL };
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index f399917..bac02c8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -276,7 +276,7 @@ config PPC_E500 select FSL_EMB_PERFMON bool - select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 + select ARCH_SUPPORTS_HUGETLBFS select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP @@ -337,7 +337,7 @@ config PTE_64BIT bool depends on 44x || PPC_E500 || PPC_86xx - default y if PHYS_64BIT + default y if PPC_E500 || PHYS_64BIT config PHYS_64BIT bool 'Large physical address support' if PPC_E500 || PPC_86xx
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 64ffc64..8285b9a 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c
@@ -605,7 +605,7 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq &pseries_msi_irq_chip, pseries_dev); } - pseries_dev->msi_used++; + pseries_dev->msi_used += nr_irqs; return 0; out:
diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/check-fpatchable-function-entry.sh similarity index 100% rename from arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh rename to arch/powerpc/tools/check-fpatchable-function-entry.sh
diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index bac186b..9218d43 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh
@@ -15,9 +15,9 @@ RELOCATION=R_PPC_ADDR32 fi -num_ool_stubs_total=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | +num_ool_stubs_total=$($objdump -r -j __patchable_function_entries -d "$vmlinux_o" | grep -c "$RELOCATION") -num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | +num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries -d "$vmlinux_o" | grep -e ".init.text" -e ".text.startup" | grep -c "$RELOCATION") num_ool_stubs_text=$((num_ool_stubs_total - num_ool_stubs_inittext))
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 5c2963e..a0ffedc 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -428,6 +428,7 @@ can0: can@2010c000 { clocks = <&clkcfg CLK_CAN0>, <&clkcfg CLK_MSSPLL3>; interrupt-parent = <&plic>; interrupts = <56>; + resets = <&mss_top_sysreg CLK_CAN0>; status = "disabled"; }; @@ -437,6 +438,7 @@ can1: can@2010d000 { clocks = <&clkcfg CLK_CAN1>, <&clkcfg CLK_MSSPLL3>; interrupt-parent = <&plic>; interrupts = <57>; + resets = <&mss_top_sysreg CLK_CAN1>; status = "disabled"; };
diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index d766e2b..900db0a 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h
@@ -2,6 +2,10 @@ #ifndef _ASM_RISCV_RUNTIME_CONST_H #define _ASM_RISCV_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #include <asm/asm.h> #include <asm/alternative.h> #include <asm/cacheflush.h>
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 18988a5..70a74ad 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -9,6 +9,7 @@ #ifndef __ASSEMBLER__ #include <linux/types.h> +#include <linux/const.h> #define PTRACE_GETFDPIC 33 @@ -138,12 +139,12 @@ struct __sc_riscv_cfi_state { #define PTRACE_CFI_SS_LOCK_BIT 4 #define PTRACE_CFI_SS_PTR_BIT 5 -#define PTRACE_CFI_LP_EN_STATE BIT(PTRACE_CFI_LP_EN_BIT) -#define PTRACE_CFI_LP_LOCK_STATE BIT(PTRACE_CFI_LP_LOCK_BIT) -#define PTRACE_CFI_ELP_STATE BIT(PTRACE_CFI_ELP_BIT) -#define PTRACE_CFI_SS_EN_STATE BIT(PTRACE_CFI_SS_EN_BIT) -#define PTRACE_CFI_SS_LOCK_STATE BIT(PTRACE_CFI_SS_LOCK_BIT) -#define PTRACE_CFI_SS_PTR_STATE BIT(PTRACE_CFI_SS_PTR_BIT) +#define PTRACE_CFI_LP_EN_STATE _BITUL(PTRACE_CFI_LP_EN_BIT) +#define PTRACE_CFI_LP_LOCK_STATE _BITUL(PTRACE_CFI_LP_LOCK_BIT) +#define PTRACE_CFI_ELP_STATE _BITUL(PTRACE_CFI_ELP_BIT) +#define PTRACE_CFI_SS_EN_STATE _BITUL(PTRACE_CFI_SS_EN_BIT) +#define PTRACE_CFI_SS_LOCK_STATE _BITUL(PTRACE_CFI_SS_LOCK_BIT) +#define PTRACE_CFI_SS_PTR_STATE _BITUL(PTRACE_CFI_SS_PTR_BIT) #define PRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_LP_EN_STATE | \ PTRACE_CFI_LP_LOCK_STATE | \
diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 15fec5d..0bf62920 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c
@@ -175,7 +175,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { {DBG_REG_T1, GDB_SIZEOF_REG, offsetof(struct pt_regs, t1)}, {DBG_REG_T2, GDB_SIZEOF_REG, offsetof(struct pt_regs, t2)}, {DBG_REG_FP, GDB_SIZEOF_REG, offsetof(struct pt_regs, s0)}, - {DBG_REG_S1, GDB_SIZEOF_REG, offsetof(struct pt_regs, a1)}, + {DBG_REG_S1, GDB_SIZEOF_REG, offsetof(struct pt_regs, s1)}, {DBG_REG_A0, GDB_SIZEOF_REG, offsetof(struct pt_regs, a0)}, {DBG_REG_A1, GDB_SIZEOF_REG, offsetof(struct pt_regs, a1)}, {DBG_REG_A2, GDB_SIZEOF_REG, offsetof(struct pt_regs, a2)}, @@ -244,8 +244,9 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) gdb_regs[DBG_REG_S6_OFF] = task->thread.s[6]; gdb_regs[DBG_REG_S7_OFF] = task->thread.s[7]; gdb_regs[DBG_REG_S8_OFF] = task->thread.s[8]; - gdb_regs[DBG_REG_S9_OFF] = task->thread.s[10]; - gdb_regs[DBG_REG_S10_OFF] = task->thread.s[11]; + gdb_regs[DBG_REG_S9_OFF] = task->thread.s[9]; + gdb_regs[DBG_REG_S10_OFF] = task->thread.s[10]; + gdb_regs[DBG_REG_S11_OFF] = task->thread.s[11]; gdb_regs[DBG_REG_EPC_OFF] = task->thread.ra; }
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index db13c9d..16b2433 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c
@@ -42,19 +42,20 @@ static inline bool is_kernel_exittext(uintptr_t addr) static __always_inline void *patch_map(void *addr, const unsigned int fixmap) { uintptr_t uintaddr = (uintptr_t) addr; - struct page *page; + phys_addr_t phys; - if (core_kernel_text(uintaddr) || is_kernel_exittext(uintaddr)) - page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) - page = vmalloc_to_page(addr); - else + if (core_kernel_text(uintaddr) || is_kernel_exittext(uintaddr)) { + phys = __pa_symbol(addr); + } else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) { + struct page *page = vmalloc_to_page(addr); + + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } else { return addr; + } - BUG_ON(!page); - - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - offset_in_page(addr)); + return (void *)set_fixmap_offset(fixmap, phys); } static void patch_unmap(int fixmap)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index aacb239..5957eff 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c
@@ -347,8 +347,10 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) return -EINVAL; - if (!(arg & PR_TAGGED_ADDR_ENABLE)) + if (!(arg & PR_TAGGED_ADDR_ENABLE)) { pmlen = PMLEN_0; + pmm = ENVCFG_PMM_PMLEN_0; + } if (mmap_write_lock_killable(mm)) return -EINTR;
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 61bd5ba..997f9eb 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -170,6 +170,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS .riscv.attributes 0 : { *(.riscv.attributes) }
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 77379f7..ec2cee0 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig
@@ -30,7 +30,6 @@ select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select VIRT_XFER_TO_GUEST_WORK - select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index cac3c2b..5ec5032 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c
@@ -13,6 +13,7 @@ #include <linux/irqchip/riscv-imsic.h> #include <linux/irqdomain.h> #include <linux/kvm_host.h> +#include <linux/nospec.h> #include <linux/percpu.h> #include <linux/spinlock.h> #include <asm/cpufeature.h> @@ -182,9 +183,14 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + if (!riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); *out_val = 0; if (kvm_riscv_aia_available()) @@ -198,9 +204,14 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, unsigned long val) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + if (!riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); if (kvm_riscv_aia_available()) { ((unsigned long *)csr)[reg_num] = val;
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index d1e50bf..3464f33 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c
@@ -10,6 +10,7 @@ #include <linux/irqchip/riscv-aplic.h> #include <linux/kvm_host.h> #include <linux/math.h> +#include <linux/nospec.h> #include <linux/spinlock.h> #include <linux/swab.h> #include <kvm/iodev.h> @@ -45,7 +46,7 @@ static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return 0; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = irqd->sourcecfg; @@ -61,7 +62,7 @@ static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; if (val & APLIC_SOURCECFG_D) val = 0; @@ -81,7 +82,7 @@ static u32 aplic_read_target(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return 0; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = irqd->target; @@ -97,7 +98,7 @@ static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; val &= APLIC_TARGET_EIID_MASK | (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) | @@ -116,7 +117,7 @@ static bool aplic_read_pending(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false; @@ -132,7 +133,7 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -170,7 +171,7 @@ static bool aplic_read_enabled(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false; @@ -186,7 +187,7 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) if (!irq || aplic->nr_irqs <= irq) return; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); if (enabled) @@ -205,7 +206,7 @@ static bool aplic_read_input(struct aplic *aplic, u32 irq) if (!irq || aplic->nr_irqs <= irq) return false; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -254,7 +255,7 @@ static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last) for (irq = first; irq <= last; irq++) { if (!irq || aplic->nr_irqs <= irq) continue; - irqd = &aplic->irqs[irq]; + irqd = &aplic->irqs[array_index_nospec(irq, aplic->nr_irqs)]; raw_spin_lock_irqsave(&irqd->lock, flags); @@ -283,7 +284,7 @@ int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level) if (!aplic || !source || (aplic->nr_irqs <= source)) return -ENODEV; - irqd = &aplic->irqs[source]; + irqd = &aplic->irqs[array_index_nospec(source, aplic->nr_irqs)]; ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false; raw_spin_lock_irqsave(&irqd->lock, flags);
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index b195a93..49c71d3 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c
@@ -11,6 +11,7 @@ #include <linux/irqchip/riscv-imsic.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> +#include <linux/cpufeature.h> static int aia_create(struct kvm_device *dev, u32 type) { @@ -22,6 +23,9 @@ static int aia_create(struct kvm_device *dev, u32 type) if (irqchip_in_kernel(kvm)) return -EEXIST; + if (!riscv_isa_extension_available(NULL, SSAIA)) + return -ENODEV; + ret = -EBUSY; if (kvm_trylock_all_vcpus(kvm)) return ret; @@ -437,7 +441,7 @@ static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int nr_vcpus; + int nr_vcpus, r = -ENXIO; switch (attr->group) { case KVM_DEV_RISCV_AIA_GRP_CONFIG: @@ -466,12 +470,18 @@ static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) } break; case KVM_DEV_RISCV_AIA_GRP_APLIC: - return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr); + mutex_lock(&dev->kvm->lock); + r = kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr); + mutex_unlock(&dev->kvm->lock); + break; case KVM_DEV_RISCV_AIA_GRP_IMSIC: - return kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr); + mutex_lock(&dev->kvm->lock); + r = kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr); + mutex_unlock(&dev->kvm->lock); + break; } - return -ENXIO; + return r; } struct kvm_device_ops kvm_riscv_aia_device_ops = {
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 06752fa..8786f52 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c
@@ -908,6 +908,10 @@ int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel, int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC; struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + /* If IMSIC vCPU state not initialized then forward to user space */ + if (!imsic) + return KVM_INSN_EXIT_TO_USER_SPACE; + if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) { /* Read pending and enabled interrupt with highest priority */ topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix,
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 0b75eb2..088d33b 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c
@@ -245,6 +245,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { struct kvm_gstage gstage; + bool mmu_locked; if (!kvm->arch.pgd) return false; @@ -253,9 +254,12 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) gstage.flags = 0; gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); gstage.pgd = kvm->arch.pgd; + mmu_locked = spin_trylock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, range->may_block); + if (mmu_locked) + spin_unlock(&kvm->mmu_lock); return false; } @@ -535,7 +539,7 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, goto out_unlock; /* Check if we are backed by a THP and thus use block mapping if possible */ - if (vma_pagesize == PAGE_SIZE) + if (!logging && (vma_pagesize == PAGE_SIZE)) vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa); if (writable) {
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index a55a95d..fdd99ac 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c
@@ -24,7 +24,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, ecall_exit_stat), STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 030904d..bd5a9e7 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c
@@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/cpufeature.h> @@ -93,9 +94,11 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) reg_val = &cntx->fp.f.fcsr; else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && - reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) { + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.f.f)); reg_val = &cntx->fp.f.f[reg_num]; - else + } else return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { @@ -107,6 +110,8 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.d.f)); reg_val = &cntx->fp.d.f[reg_num]; } else return -ENOENT; @@ -138,9 +143,11 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) reg_val = &cntx->fp.f.fcsr; else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && - reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) { + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.f.f)); reg_val = &cntx->fp.f.f[reg_num]; - else + } else return -ENOENT; } else if ((rtype == KVM_REG_RISCV_FP_D) && riscv_isa_extension_available(vcpu->arch.isa, d)) { @@ -152,6 +159,8 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { if (KVM_REG_SIZE(reg->id) != sizeof(u64)) return -EINVAL; + reg_num = array_index_nospec(reg_num, + ARRAY_SIZE(cntx->fp.d.f)); reg_val = &cntx->fp.d.f[reg_num]; } else return -ENOENT;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index e7ab6cb..45ecc00 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -10,6 +10,7 @@ #include <linux/bitops.h> #include <linux/errno.h> #include <linux/err.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <linux/kvm_host.h> #include <asm/cacheflush.h> @@ -127,6 +128,7 @@ static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *g kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) return -ENOENT; + kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); *guest_ext = kvm_isa_ext_arr[kvm_ext]; switch (*guest_ext) { case RISCV_ISA_EXT_SMNPM: @@ -443,13 +445,16 @@ static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu, unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE); + unsigned long regs_max = sizeof(struct kvm_riscv_core) / sizeof(unsigned long); unsigned long reg_val; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) reg_val = cntx->sepc; else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && @@ -476,13 +481,16 @@ static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE); + unsigned long regs_max = sizeof(struct kvm_riscv_core) / sizeof(unsigned long); unsigned long reg_val; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -507,10 +515,13 @@ static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { kvm_riscv_vcpu_flush_interrupts(vcpu); *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; @@ -526,10 +537,13 @@ static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, unsigned long reg_val) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + if (reg_num >= regs_max) return -ENOENT; + reg_num = array_index_nospec(reg_num, regs_max); + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { reg_val &= VSIP_VALID_MASK; reg_val <<= VSIP_TO_HVIP_SHIFT; @@ -548,10 +562,15 @@ static inline int kvm_riscv_vcpu_smstateen_set_csr(struct kvm_vcpu *vcpu, unsigned long reg_val) { struct kvm_vcpu_smstateen_csr *csr = &vcpu->arch.smstateen_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_smstateen_csr) / + sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_smstateen_csr) / - sizeof(unsigned long)) - return -EINVAL; + if (!riscv_isa_extension_available(vcpu->arch.isa, SMSTATEEN)) + return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); ((unsigned long *)csr)[reg_num] = reg_val; return 0; @@ -562,10 +581,15 @@ static int kvm_riscv_vcpu_smstateen_get_csr(struct kvm_vcpu *vcpu, unsigned long *out_val) { struct kvm_vcpu_smstateen_csr *csr = &vcpu->arch.smstateen_csr; + unsigned long regs_max = sizeof(struct kvm_riscv_smstateen_csr) / + sizeof(unsigned long); - if (reg_num >= sizeof(struct kvm_riscv_smstateen_csr) / - sizeof(unsigned long)) - return -EINVAL; + if (!riscv_isa_extension_available(vcpu->arch.isa, SMSTATEEN)) + return -ENOENT; + if (reg_num >= regs_max) + return -ENOENT; + + reg_num = array_index_nospec(reg_num, regs_max); *out_val = ((unsigned long *)csr)[reg_num]; return 0; @@ -595,10 +619,7 @@ static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val); break; case KVM_REG_RISCV_CSR_SMSTATEEN: - rc = -EINVAL; - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) - rc = kvm_riscv_vcpu_smstateen_get_csr(vcpu, reg_num, - ®_val); + rc = kvm_riscv_vcpu_smstateen_get_csr(vcpu, reg_num, ®_val); break; default: rc = -ENOENT; @@ -640,10 +661,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); break; case KVM_REG_RISCV_CSR_SMSTATEEN: - rc = -EINVAL; - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) - rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, - reg_val); + rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, reg_val); break; default: rc = -ENOENT;
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 4d8d5e9..e873430 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/nospec.h> #include <linux/perf/riscv_pmu.h> #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> @@ -87,7 +88,8 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code) { - return hw_event_perf_map[sbi_event_code]; + return hw_event_perf_map[array_index_nospec(sbi_event_code, + SBI_PMU_HW_GENERAL_MAX)]; } static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code) @@ -218,6 +220,7 @@ static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, return -EINVAL; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) @@ -244,6 +247,7 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, return -EINVAL; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -520,11 +524,12 @@ int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx, { struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); - if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) { + if (cidx >= RISCV_KVM_MAX_COUNTERS || cidx == 1) { retdata->err_val = SBI_ERR_INVALID_PARAM; return 0; } + cidx = array_index_nospec(cidx, RISCV_KVM_MAX_COUNTERS); retdata->out_val = kvpmu->pmc[cidx].cinfo.value; return 0; @@ -559,7 +564,8 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { - pmc_index = i + ctr_base; + pmc_index = array_index_nospec(i + ctr_base, + RISCV_KVM_MAX_COUNTERS); if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; /* The guest started the counter again. Reset the overflow status */ @@ -630,7 +636,8 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { - pmc_index = i + ctr_base; + pmc_index = array_index_nospec(i + ctr_base, + RISCV_KVM_MAX_COUNTERS); if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; pmc = &kvpmu->pmc[pmc_index]; @@ -761,6 +768,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba } } + ctr_idx = array_index_nospec(ctr_idx, RISCV_KVM_MAX_COUNTERS); pmc = &kvpmu->pmc[ctr_idx]; pmc->idx = ctr_idx;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 7cbd234..13c63ae 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c
@@ -13,7 +13,7 @@ #include <linux/kvm_host.h> #include <asm/kvm_mmu.h> -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == @@ -181,7 +181,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_IOEVENTFD: case KVM_CAP_USER_MEMORY: - case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_READONLY_MEM:
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index f318407..dad02f5 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h
@@ -62,8 +62,8 @@ do { \ * @size: number of elements in array */ #define array_index_mask_nospec array_index_mask_nospec -static inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) +static __always_inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) { unsigned long mask;
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 09f763b..32536ee 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h
@@ -19,9 +19,9 @@ struct s390_idle_data { unsigned long mt_cycles_enter[8]; }; +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; -void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); - #endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 64a50f0..3039c88 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h
@@ -710,6 +710,9 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); +#define SIE64_RETURN_NORMAL 0 +#define SIE64_RETURN_MCCK 1 + int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index cc187af..78195ee 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h
@@ -159,7 +159,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, " j 4f\n" "3: mvc 8(1,%[addr]),0(%[addr])\n" "4:" - : [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp) + : [addr] "+&a" (erase_low), [count] "+&a" (count), [tmp] "=&a" (tmp) : [poison] "d" (poison) : "memory", "cc" );
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index c9ae680..ac3606c 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h
@@ -62,7 +62,7 @@ struct stack_frame { struct { unsigned long sie_control_block; unsigned long sie_savearea; - unsigned long sie_reason; + unsigned long sie_return; unsigned long sie_flags; unsigned long sie_control_block_phys; unsigned long sie_guest_asce;
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index 9d25fb3..b1db75d 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h
@@ -2,6 +2,12 @@ #ifndef _S390_VTIME_H #define _S390_VTIME_H +#include <asm/lowcore.h> +#include <asm/cpu_mf.h> +#include <asm/idle.h> + +DECLARE_PER_CPU(u64, mt_cycles[8]); + static inline void update_timer_sys(void) { struct lowcore *lc = get_lowcore(); @@ -20,4 +26,32 @@ static inline void update_timer_mcck(void) lc->last_update_timer = lc->mcck_enter_timer; } +static inline void update_timer_idle(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct lowcore *lc = get_lowcore(); + u64 cycles_new[8]; + int i, mtid; + + mtid = smp_cpu_mtid; + if (mtid) { + stcctm(MT_DIAG, mtid, cycles_new); + for (i = 0; i < mtid; i++) + __this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + /* + * This is a bit subtle: Forward last_update_clock so it excludes idle + * time. For correct steal time calculation in do_account_vtime() add + * passed wall time before idle_enter to steal_timer: + * During the passed wall time before idle_enter CPU time may have + * been accounted to system, hardirq, softirq, etc. lowcore fields. + * The accounted CPU times will be subtracted again from steal_timer + * when accumulated steal time is calculated in do_account_vtime(). + */ + lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; + lc->last_update_clock = lc->int_clock; + lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; + lc->last_update_timer = lc->sys_enter_timer; +} + #endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e1a5b5b..fbd26f3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c
@@ -63,7 +63,7 @@ int main(void) OFFSET(__SF_EMPTY, stack_frame, empty[0]); OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block); OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); - OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); + OFFSET(__SF_SIE_RETURN, stack_frame, sie_return); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4873fe9..bb806d1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S
@@ -200,7 +200,7 @@ stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce - xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 + xc __SF_SIE_RETURN(8,%r15),__SF_SIE_RETURN(%r15) # return code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 @@ -237,7 +237,7 @@ xgr %r4,%r4 xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_SIE_REASON(%r15) # return exit reason code + lg %r2,__SF_SIE_RETURN(%r15) # return sie return code BR_EX %r14 SYM_FUNC_END(__sie64a) EXPORT_SYMBOL(__sie64a) @@ -271,6 +271,7 @@ xgr %r9,%r9 xgr %r10,%r10 xgr %r11,%r11 + xgr %r12,%r12 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13) MBEAR %r2,%r13 @@ -407,6 +408,7 @@ xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) MBEAR %r11,%r13 @@ -496,6 +498,7 @@ xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index dd55cc6..fb67b4a 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h
@@ -56,8 +56,6 @@ long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags); -DECLARE_PER_CPU(u64, mt_cycles[8]); - unsigned long stack_alloc(void); void stack_free(unsigned long stack);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 39cb8d0..1f1b06b 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c
@@ -15,37 +15,22 @@ #include <trace/events/power.h> #include <asm/cpu_mf.h> #include <asm/cputime.h> +#include <asm/idle.h> #include <asm/nmi.h> #include <asm/smp.h> -#include "entry.h" -static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); void account_idle_time_irq(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); - struct lowcore *lc = get_lowcore(); unsigned long idle_time; - u64 cycles_new[8]; - int i; - if (smp_cpu_mtid) { - stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); - for (i = 0; i < smp_cpu_mtid; i++) - this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); - } - - idle_time = lc->int_clock - idle->clock_idle_enter; - - lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; - lc->last_update_clock = lc->int_clock; - - lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; - lc->last_update_timer = lc->sys_enter_timer; + idle_time = get_lowcore()->int_clock - idle->clock_idle_enter; /* Account time spent with enabled wait psw loaded as idle time. */ - WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); - WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1); + __atomic64_add(idle_time, &idle->idle_time); + __atomic64_add_const(1, &idle->idle_count); account_idle_time(cputime_to_nsecs(idle_time)); }
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index dcdc7e2..049c557 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c
@@ -2377,7 +2377,7 @@ void __init setup_ipl(void) atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); } -void s390_reset_system(void) +void __no_stack_protector s390_reset_system(void) { /* Disable prefixing */ set_prefix(0);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f81723b..d10a17e 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c
@@ -146,6 +146,10 @@ void noinstr do_io_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); bool from_idle; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + update_timer_idle(); + irq_enter_rcu(); if (user_mode(regs)) { @@ -154,7 +158,6 @@ void noinstr do_io_irq(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq(); @@ -182,6 +185,10 @@ void noinstr do_ext_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); bool from_idle; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); + if (from_idle) + update_timer_idle(); + irq_enter_rcu(); if (user_mode(regs)) { @@ -194,7 +201,6 @@ void noinstr do_ext_irq(struct pt_regs *regs) regs->int_parm = get_lowcore()->ext_params; regs->int_parm_long = get_lowcore()->ext_params2; - from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq();
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index a55abbf..94fbfad 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c
@@ -487,8 +487,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); if (test_cpu_flag(CIF_MCCK_GUEST) && (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { - /* Set exit reason code for host's later handling */ - *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + /* Set sie return code for host's later handling */ + ((struct stack_frame *)regs->gprs[15])->sie_return = SIE64_RETURN_MCCK; } clear_cpu_flag(CIF_MCCK_GUEST);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c92c29d..7bfeb52 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1168,6 +1168,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, static void hw_perf_event_update(struct perf_event *event, int flush_all) { unsigned long long event_overflow, sampl_overflow, num_sdb; + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct hw_perf_event *hwc = &event->hw; union hws_trailer_header prev, new; struct hws_trailer_entry *te; @@ -1247,8 +1248,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) * are dropped. * Slightly increase the interval to avoid hitting this limit. */ - if (event_overflow) + if (event_overflow) { SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + if (SAMPL_RATE(hwc) > cpuhw->qsi.max_sampl_rate) + SAMPL_RATE(hwc) = cpuhw->qsi.max_sampl_rate; + } } static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 795b6cc..d103c85 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c
@@ -13,6 +13,7 @@ */ #include <linux/cpufeature.h> +#include <linux/nospec.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -131,8 +132,10 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) + if (likely(nr < NR_syscalls)) { + nr = array_index_nospec(nr, NR_syscalls); regs->gprs[2] = sys_call_table[nr](regs); + } out: syscall_exit_to_user_mode(regs); }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 53bcbb9..2b62395 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S
@@ -221,6 +221,7 @@ /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS /*
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 234a0ba..bf48744 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c
@@ -48,8 +48,7 @@ static inline void set_vtimer(u64 expires) static inline int virt_timer_forward(u64 elapsed) { - BUG_ON(!irqs_disabled()); - + lockdep_assert_irqs_disabled(); if (list_empty(&virt_timer_list)) return 0; elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed); @@ -137,23 +136,16 @@ static int do_account_vtime(struct task_struct *tsk) lc->system_timer += timer; /* Update MT utilization calculation */ - if (smp_cpu_mtid && - time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) + if (smp_cpu_mtid && time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) update_mt_scaling(); /* Calculate cputime delta */ - user = update_tsk_timer(&tsk->thread.user_timer, - READ_ONCE(lc->user_timer)); - guest = update_tsk_timer(&tsk->thread.guest_timer, - READ_ONCE(lc->guest_timer)); - system = update_tsk_timer(&tsk->thread.system_timer, - READ_ONCE(lc->system_timer)); - hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, - READ_ONCE(lc->hardirq_timer)); - softirq = update_tsk_timer(&tsk->thread.softirq_timer, - READ_ONCE(lc->softirq_timer)); - lc->steal_timer += - clock - user - guest - system - hardirq - softirq; + user = update_tsk_timer(&tsk->thread.user_timer, lc->user_timer); + guest = update_tsk_timer(&tsk->thread.guest_timer, lc->guest_timer); + system = update_tsk_timer(&tsk->thread.system_timer, lc->system_timer); + hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, lc->hardirq_timer); + softirq = update_tsk_timer(&tsk->thread.softirq_timer, lc->softirq_timer); + lc->steal_timer += clock - user - guest - system - hardirq - softirq; /* Push account value */ if (user) { @@ -225,10 +217,6 @@ static u64 vtime_delta(void) return timer - lc->last_update_timer; } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ void vtime_account_kernel(struct task_struct *tsk) { struct lowcore *lc = get_lowcore(); @@ -238,27 +226,17 @@ void vtime_account_kernel(struct task_struct *tsk) lc->guest_timer += delta; else lc->system_timer += delta; - - virt_timer_forward(delta); } EXPORT_SYMBOL_GPL(vtime_account_kernel); void vtime_account_softirq(struct task_struct *tsk) { - u64 delta = vtime_delta(); - - get_lowcore()->softirq_timer += delta; - - virt_timer_forward(delta); + get_lowcore()->softirq_timer += vtime_delta(); } void vtime_account_hardirq(struct task_struct *tsk) { - u64 delta = vtime_delta(); - - get_lowcore()->hardirq_timer += delta; - - virt_timer_forward(delta); + get_lowcore()->hardirq_timer += vtime_delta(); } /*
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 917ac74..5b835bc 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig
@@ -28,9 +28,7 @@ select HAVE_KVM_INVALID_WAKEUPS select HAVE_KVM_NO_POLL select KVM_VFIO - select MMU_NOTIFIER select VIRT_XFER_TO_GUEST_WORK - select KVM_GENERIC_MMU_NOTIFIER select KVM_MMU_LOCKLESS_AGING help Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 670404d..7b8d70f 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c
@@ -135,32 +135,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt } /** - * dat_crstep_xchg() - Exchange a gmap CRSTE with another. - * @crstep: Pointer to the CRST entry - * @new: Replacement entry. - * @gfn: The affected guest address. - * @asce: The ASCE of the address space. - * - * Context: This function is assumed to be called with kvm->mmu_lock held. - */ -void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce) -{ - if (crstep->h.i) { - WRITE_ONCE(*crstep, new); - return; - } else if (cpu_has_edat2()) { - crdte_crste(crstep, *crstep, new, gfn, asce); - return; - } - - if (machine_has_tlb_guest()) - idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL); - else - idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL); - WRITE_ONCE(*crstep, new); -} - -/** * dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another. * @crstep: Pointer to the CRST entry. * @old: Expected old value. @@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce * * Return: %true if the exchange was successful. */ -bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn, - union asce asce) +bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, + gfn_t gfn, union asce asce) { if (old.h.i) return arch_try_cmpxchg((long *)crstep, &old.val, new.val); @@ -292,6 +266,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g pt->ptes[i].val = init.val | i * PAGE_SIZE; /* No need to take locks as the page table is not installed yet. */ pgste_init.prefix_notif = old.s.fc1.prefix_notif; + pgste_init.vsie_notif = old.s.fc1.vsie_notif; pgste_init.pcl = uses_skeys && init.h.i; dat_init_pgstes(pt, pgste_init.val); } else { @@ -893,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d /* This table entry needs to be updated. */ if (walk->start <= gfn && walk->end >= next) { - dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce); + if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce)) + return -EINVAL; /* A lower level table was present, needs to be freed. */ if (!crste.h.fc && !crste.h.i) { if (is_pmd(crste)) @@ -1021,67 +997,21 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end) return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0; } -int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, - bool uses_skeys, struct guest_fault *f) -{ - union crste oldval, newval; - union pte newpte, oldpte; - union pgste pgste; - int rc = 0; - - rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep); - if (rc == -EINVAL || rc == -ENOMEM) - return rc; - if (rc) - return -EAGAIN; - - if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level))) - return -EINVAL; - - if (f->ptep) { - pgste = pgste_get_lock(f->ptep); - oldpte = *f->ptep; - newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page); - newpte.s.sd = oldpte.s.sd; - oldpte.s.sd = 0; - if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) { - pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys); - if (f->callback) - f->callback(f); - } else { - rc = -EAGAIN; - } - pgste_set_unlock(f->ptep, pgste); - } else { - oldval = READ_ONCE(*f->crstep); - newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, - f->write_attempt | oldval.s.fc1.d); - newval.s.fc1.sd = oldval.s.fc1.sd; - if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && - crste_origin_large(oldval) != crste_origin_large(newval)) - return -EAGAIN; - if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce)) - return -EAGAIN; - if (f->callback) - f->callback(f); - } - - return rc; -} - static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) { - union crste crste = READ_ONCE(*crstep); + union crste newcrste, oldcrste; int *n = walk->priv; - if (!crste.h.fc || crste.h.i || crste.h.p) - return 0; - + do { + oldcrste = READ_ONCE(*crstep); + if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p) + return 0; + if (oldcrste.s.fc1.prefix_notif) + break; + newcrste = oldcrste; + newcrste.s.fc1.prefix_notif = 1; + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce)); *n = 2; - if (crste.s.fc1.prefix_notif) - return 0; - crste.s.fc1.prefix_notif = 1; - dat_crstep_xchg(crstep, crste, gfn, walk->asce); return 0; }
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 123e11d..874cc96 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h
@@ -160,14 +160,14 @@ union pmd { unsigned long :44; /* HW */ unsigned long : 3; /* Unused */ unsigned long : 1; /* HW */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable soft-bit */ unsigned long r : 1; /* Readable soft-bit */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; /* Unused */ unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ @@ -183,14 +183,14 @@ union pud { unsigned long :33; /* HW */ unsigned long :14; /* Unused */ unsigned long : 1; /* HW */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable soft-bit */ unsigned long r : 1; /* Readable soft-bit */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; /* Unused */ unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ @@ -254,14 +254,14 @@ union crste { struct { unsigned long :47; unsigned long : 1; /* HW (should be 0) */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable */ unsigned long r : 1; /* Readable */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ @@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf u16 type, u16 param); int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn); bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end); -int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, - bool uses_skeys, struct guest_fault *f); int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty); long dat_reset_cmma(union asce asce, gfn_t start_gfn); @@ -938,11 +936,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce); } -static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce) +static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce) { - union crste newcrste = _CRSTE_EMPTY(crstep->h.tt); + union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt); - dat_crstep_xchg(crstep, newcrste, gfn, asce); + do { + oldcrste = READ_ONCE(*crstep); + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce)); + return oldcrste; } static inline int get_level(union crste *crstep, union pte *ptep)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4630b2a..53a8550 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c
@@ -1434,17 +1434,27 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union if (rc) return rc; - pgste = pgste_get_lock(ptep_h); - newpte = _pte(f->pfn, f->writable, !p, 0); - newpte.s.d |= ptep->s.d; - newpte.s.sd |= ptep->s.sd; - newpte.h.p &= ptep->h.p; - pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); - pgste.vsie_notif = 1; + if (!pgste_get_trylock(ptep_h, &pgste)) + return -EAGAIN; + newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s); + newpte.s.d |= ptep_h->s.d; + newpte.s.sd |= ptep_h->s.sd; + newpte.h.p &= ptep_h->h.p; + if (!newpte.h.p && !f->writable) { + rc = -EOPNOTSUPP; + } else { + pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); + pgste.vsie_notif = 1; + } pgste_set_unlock(ptep_h, pgste); + if (rc) + return rc; + if (!sg->parent) + return -EAGAIN; newpte = _pte(f->pfn, 0, !p, 0); - pgste = pgste_get_lock(ptep); + if (!pgste_get_trylock(ptep, &pgste)) + return -EAGAIN; pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg)); pgste_set_unlock(ptep, pgste); @@ -1454,7 +1464,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table, struct guest_fault *f, bool p) { - union crste newcrste; + union crste newcrste, oldcrste; gfn_t gfn; int rc; @@ -1467,16 +1477,28 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni if (rc) return rc; - newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p); - newcrste.s.fc1.d |= host->s.fc1.d; - newcrste.s.fc1.sd |= host->s.fc1.sd; - newcrste.h.p &= host->h.p; - newcrste.s.fc1.vsie_notif = 1; - newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif; - _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false); + do { + /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */ + if (!sg->parent) + return -EAGAIN; + oldcrste = READ_ONCE(*host); + newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p); + newcrste.s.fc1.d |= oldcrste.s.fc1.d; + newcrste.s.fc1.sd |= oldcrste.s.fc1.sd; + newcrste.h.p &= oldcrste.h.p; + newcrste.s.fc1.vsie_notif = 1; + newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif; + newcrste.s.fc1.s = oldcrste.s.fc1.s; + if (!newcrste.h.p && !f->writable) + return -EOPNOTSUPP; + } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false)); + if (!sg->parent) + return -EAGAIN; - newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p); - dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce); + newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); + gfn = gpa_to_gfn(raddr); + while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce)) + ; return 0; } @@ -1500,21 +1522,31 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, if (rc) return rc; - /* A race occourred. The shadow mapping is already valid, nothing to do */ - if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table))) + /* A race occurred. The shadow mapping is already valid, nothing to do */ + if ((ptep && !ptep->h.i && ptep->h.p == w->p) || + (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p)) return 0; gl = get_level(table, ptep); + /* In case of a real address space */ + if (w->level <= LEVEL_MEM) { + l = TABLE_TYPE_PAGE_TABLE; + hl = TABLE_TYPE_REGION1; + goto real_address_space; + } + /* * Skip levels that are already protected. For each level, protect * only the page containing the entry, not the whole table. */ for (i = gl ; i >= w->level; i--) { - rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr), - entries[i - 1].pfn, i, entries[i - 1].writable); + rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr), + entries[i].pfn, i + 1, entries[i].writable); if (rc) return rc; + if (!sg->parent) + return -EAGAIN; } rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF, @@ -1526,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, /* Get the smallest granularity */ l = min3(gl, hl, w->level); +real_address_space: flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); /* If necessary, create the shadow mapping */ if (l < gl) {
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index ef0c6ebf..645c32c 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st struct clear_young_pte_priv *priv = walk->priv; union crste crste, new; - crste = READ_ONCE(*crstep); + do { + crste = READ_ONCE(*crstep); - if (!crste.h.fc) - return 0; - if (!crste.s.fc1.y && crste.h.i) - return 0; - if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) { + if (!crste.h.fc) + return 0; + if (!crste.s.fc1.y && crste.h.i) + return 0; + if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end)) + break; + new = crste; new.h.i = 1; new.s.fc1.y = 0; @@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st folio_set_dirty(phys_to_folio(crste_origin_large(crste))); new.s.fc1.d = 0; new.h.p = 1; - dat_crstep_xchg(crstep, new, gfn, walk->asce); - } + } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce)); + priv->young = 1; return 0; } @@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct { struct gmap_unmap_priv *priv = walk->priv; struct folio *folio = NULL; + union crste old = *crstep; - if (crstep->h.fc) { - if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags)) - folio = phys_to_folio(crste_origin_large(*crstep)); - gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn); - if (folio) - uv_convert_from_secure_folio(folio); - } + if (!old.h.fc) + return 0; + + if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags)) + folio = phys_to_folio(crste_origin_large(old)); + /* No races should happen because kvm->mmu_lock is held in write mode */ + KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn), + priv->gmap->kvm); + if (folio) + uv_convert_from_secure_folio(folio); return 0; } @@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t if (fatal_signal_pending(current)) return 1; - crste = READ_ONCE(*table); - if (!crste.h.fc) - return 0; - if (crste.h.p && !crste.s.fc1.sd) - return 0; + do { + crste = READ_ONCE(*table); + if (!crste.h.fc) + return 0; + if (crste.h.p && !crste.s.fc1.sd) + return 0; - /* - * If this large page contains one or more prefixes of vCPUs that are - * currently running, do not reset the protection, leave it marked as - * dirty. - */ - if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) { + /* + * If this large page contains one or more prefixes of vCPUs that are + * currently running, do not reset the protection, leave it marked as + * dirty. + */ + if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end)) + break; new = crste; new.h.p = 1; new.s.fc1.sd = 0; - gmap_crstep_xchg(gmap, table, new, gfn); - } + } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn)); for ( ; gfn < end; gfn++) mark_page_dirty(gmap->kvm, gfn); @@ -511,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end) _dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap); } -static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f) +static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f) { union crste newcrste, oldcrste = READ_ONCE(*f->crstep); @@ -536,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f) newcrste.s.fc1.d = 1; newcrste.s.fc1.sd = 1; } - if (!oldcrste.s.fc1.d && newcrste.s.fc1.d) - SetPageDirty(phys_to_page(crste_origin_large(newcrste))); /* In case of races, let the slow path deal with it. */ - return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce); + return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn); } /* Trying to write on a read-only page, let the slow path deal with it. */ return 1; @@ -568,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste, newpte.s.d = 1; newpte.s.sd = 1; } - if (!oldpte.s.d && newpte.s.d) - SetPageDirty(pfn_to_page(newpte.h.pfra)); *pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn); return 0; @@ -606,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault) fault->callback(fault); pgste_set_unlock(fault->ptep, pgste); } else { - rc = gmap_handle_minor_crste_fault(gmap->asce, fault); + rc = gmap_handle_minor_crste_fault(gmap, fault); if (!rc && fault->callback) fault->callback(fault); } @@ -623,10 +627,61 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn) return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags); } +static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level, + struct guest_fault *f) +{ + union crste oldval, newval; + union pte newpte, oldpte; + union pgste pgste; + int rc = 0; + + rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level, + &f->crstep, &f->ptep); + if (rc == -ENOMEM) + return rc; + if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm)) + return rc; + if (rc) + return -EAGAIN; + if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm)) + return -EINVAL; + + if (f->ptep) { + pgste = pgste_get_lock(f->ptep); + oldpte = *f->ptep; + newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page); + newpte.s.sd = oldpte.s.sd; + oldpte.s.sd = 0; + if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) { + pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn); + if (f->callback) + f->callback(f); + } else { + rc = -EAGAIN; + } + pgste_set_unlock(f->ptep, pgste); + } else { + do { + oldval = READ_ONCE(*f->crstep); + newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, + f->write_attempt | oldval.s.fc1.d); + newval.s.fc1.s = !f->page; + newval.s.fc1.sd = oldval.s.fc1.sd; + if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && + crste_origin_large(oldval) != crste_origin_large(newval)) + return -EAGAIN; + } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn)); + if (f->callback) + f->callback(f); + } + + return rc; +} + int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f) { unsigned int order; - int rc, level; + int level; lockdep_assert_held(&gmap->kvm->mmu_lock); @@ -638,16 +693,14 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn)) level = TABLE_TYPE_SEGMENT; } - rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f); - KVM_BUG_ON(rc == -EINVAL, gmap->kvm); - return rc; + return _gmap_link(mc, gmap, level, f); } static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, bool force_alloc) { + union crste newcrste, oldcrste; struct page_table *pt; - union crste newcrste; union crste *crstep; union pte *ptep; int rc; @@ -673,7 +726,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, &crstep, &ptep); if (rc) return rc; - dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce); + do { + oldcrste = READ_ONCE(*crstep); + if (oldcrste.val == newcrste.val) + break; + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce)); return 0; } @@ -777,8 +834,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn) int rc; rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep); - if (!rc) - dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce); + if (rc) + return; + while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce)) + ; } void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count) @@ -1017,8 +1076,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level) dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg)); return; } - crste = READ_ONCE(*crstep); - dat_crstep_clear(crstep, r_gfn, sg->asce); + + crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce); if (crste_leaf(crste) || crste.h.i) return; if (is_pmd(crste)) @@ -1101,6 +1160,7 @@ struct gmap_protect_asce_top_level { static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg, struct gmap_protect_asce_top_level *context) { + struct gmap *parent; int rc, i; guard(write_lock)(&sg->kvm->mmu_lock); @@ -1108,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f)) return -EAGAIN; - scoped_guard(spinlock, &sg->parent->children_lock) { + parent = READ_ONCE(sg->parent); + if (!parent) + return -EAGAIN; + scoped_guard(spinlock, &parent->children_lock) { + if (READ_ONCE(sg->parent) != parent) + return -EAGAIN; for (i = 0; i < CRST_TABLE_PAGES; i++) { if (!context->f[i].valid) continue; @@ -1191,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare struct gmap *sg, *new; int rc; + if (WARN_ON(!parent)) + return ERR_PTR(-EINVAL); + scoped_guard(spinlock, &parent->children_lock) { sg = gmap_find_shadow(parent, asce, edat_level); if (sg) {
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index ccb5cd7..579399e 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h
@@ -185,6 +185,8 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un else _gmap_handle_vsie_unshadow_event(gmap, gfn); } + if (!ptep->s.d && newpte.s.d && !newpte.s.s) + SetPageDirty(pfn_to_page(newpte.h.pfra)); return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap)); } @@ -194,35 +196,42 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); } -static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne, - gfn_t gfn, bool needs_lock) +static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, + union crste oldcrste, union crste newcrste, + gfn_t gfn, bool needs_lock) { - unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11); + unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES; + + if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm)) + return true; lockdep_assert_held(&gmap->kvm->mmu_lock); if (!needs_lock) lockdep_assert_held(&gmap->children_lock); gfn = ALIGN_DOWN(gfn, align); - if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) { - ne.s.fc1.prefix_notif = 0; + if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) { + newcrste.s.fc1.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + align); } - if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif && - (ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) { - ne.s.fc1.vsie_notif = 0; + if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif && + (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) { + newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else _gmap_handle_vsie_unshadow_event(gmap, gfn); } - dat_crstep_xchg(crstep, ne, gfn, gmap->asce); + if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s) + SetPageDirty(phys_to_page(crste_origin_large(newcrste))); + return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce); } -static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne, - gfn_t gfn) +static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, + union crste oldcrste, union crste newcrste, + gfn_t gfn) { - return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true); + return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true); } /**
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 18932a6..7cb8ce8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c
@@ -2724,6 +2724,9 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) bit = bit_nr + (addr % PAGE_SIZE) * 8; + /* kvm_set_routing_entry() should never allow this to happen */ + WARN_ON_ONCE(bit > (PAGE_SIZE * BITS_PER_BYTE - 1)); + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; } @@ -2824,6 +2827,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, int rc; mci.val = mcck_info->mcic; + + /* log machine checks being reinjected on all debugs */ + VCPU_EVENT(vcpu, 2, "guest machine check %lx", mci.val); + KVM_EVENT(2, "guest machine check %lx", mci.val); + pr_info("guest machine check pid %d: %lx", current->pid, mci.val); + if (mci.sr) cr14 |= CR14_RECOVERY_SUBMASK; if (mci.dg) @@ -2852,6 +2861,7 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { + const struct kvm_irq_routing_s390_adapter *adapter; u64 uaddr_s, uaddr_i; int idx; @@ -2862,6 +2872,14 @@ int kvm_set_routing_entry(struct kvm *kvm, return -EINVAL; e->set = set_adapter_int; + adapter = &ue->u.adapter; + if (adapter->summary_addr + (adapter->summary_offset / 8) >= + (adapter->summary_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + if (adapter->ind_addr + (adapter->ind_offset / 8) >= + (adapter->ind_addr & PAGE_MASK) + PAGE_SIZE) + return -EINVAL; + idx = srcu_read_lock(&kvm->srcu); uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7a175d8..d783833 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c
@@ -65,7 +65,7 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, inject_io), STATS_DESC_COUNTER(VM, inject_float_mchk), @@ -91,7 +91,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, exit_userspace), STATS_DESC_COUNTER(VCPU, exit_null), @@ -601,7 +601,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: - case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_S390_UCONTROL case KVM_CAP_S390_UCONTROL: #endif @@ -4618,7 +4617,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) return 0; } -static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +static int vcpu_post_run(struct kvm_vcpu *vcpu, int sie_return) { struct mcck_volatile_info *mcck_info; struct sie_page *sie_page; @@ -4634,14 +4633,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; - if (exit_reason == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { sie_page = container_of(vcpu->arch.sie_block, struct sie_page, sie_block); mcck_info = &sie_page->mcck_info; kvm_s390_reinject_machine_check(vcpu, mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); if (vcpu->arch.sie_block->icptcode > 0) { rc = kvm_handle_sie_intercept(vcpu); @@ -4680,7 +4679,7 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) static int __vcpu_run(struct kvm_vcpu *vcpu) { - int rc, exit_reason; + int rc, sie_return; struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; /* @@ -4720,9 +4719,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) guest_timing_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); - exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs, - vcpu->arch.gmap->asce.val); + sie_return = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs, + vcpu->arch.gmap->asce.val); __enable_cpu_timer_accounting(vcpu); guest_timing_exit_irqoff(); @@ -4745,7 +4744,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } kvm_vcpu_srcu_read_lock(vcpu); - rc = vcpu_post_run(vcpu, exit_reason); + rc = vcpu_post_run(vcpu, sie_return); if (rc || guestdbg_exit_pending(vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); break; @@ -5521,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - idx = srcu_read_lock(&vcpu->kvm->srcu); - r = vcpu_dat_fault_handler(vcpu, arg, 0); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + gpa_t gaddr = arg; + + scoped_guard(srcu, &vcpu->kvm->srcu) { + r = vcpu_ucontrol_translate(vcpu, &gaddr); + if (r) + break; + + r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false); + if (r == PGM_ADDRESSING) + r = -EFAULT; + if (r <= 0) + break; + r = -EIO; + KVM_BUG_ON(r, vcpu->kvm); + } break; } case KVM_ENABLE_CAP:
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d249b10..72895dd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c
@@ -1122,6 +1122,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + unsigned long sie_return = SIE64_RETURN_NORMAL; int guest_bp_isolation; int rc = 0; @@ -1163,7 +1164,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc goto xfer_to_guest_mode_check; } guest_timing_enter_irqoff(); - rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); + sie_return = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val); guest_timing_exit_irqoff(); local_irq_enable(); } @@ -1178,12 +1179,13 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc kvm_vcpu_srcu_read_lock(vcpu); - if (rc == -EINTR) { - VCPU_EVENT(vcpu, 3, "%s", "machine check"); + if (sie_return == SIE64_RETURN_MCCK) { kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); return 0; } + WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL); + if (rc > 0) rc = 0; /* we could still have an icpt */ else if (current->thread.gmap_int_code) @@ -1326,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu) static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - struct gmap *sg; + struct gmap *sg = NULL; int rc = 0; while (1) { @@ -1366,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) sg = gmap_put(sg); cond_resched(); } + if (sg) + sg = gmap_put(sg); if (rc == -EFAULT) { /*
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 1721b73..5363e4c 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c
@@ -28,8 +28,8 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:" - : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "cc", "memory"); + : "+a" (bytes), "+a" (p1), "+a" (p2) + : : "0", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, @@ -54,7 +54,7 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, "2: xc 0(1,%1),0(%2)\n" "3: xc 0(1,%1),0(%3)\n" "4:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) : : "0", "cc", "memory"); } @@ -85,7 +85,7 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, "3: xc 0(1,%1),0(%3)\n" "4: xc 0(1,%1),0(%4)\n" "5:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) : : "0", "cc", "memory"); } @@ -96,7 +96,6 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p5) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 6f\n" " srlg 0,%0,8\n" @@ -122,7 +121,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, "4: xc 0(1,%1),0(%4)\n" "5: xc 0(1,%1),0(%5)\n" "6:" - : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + : "+a" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) : : "0", "cc", "memory"); }
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a52aa7a..191cc53 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c
@@ -441,10 +441,17 @@ void do_secure_storage_access(struct pt_regs *regs) folio = phys_to_folio(addr); if (unlikely(!folio_try_get(folio))) return; - rc = arch_make_folio_accessible(folio); + rc = uv_convert_from_secure(folio_to_phys(folio)); + if (!rc) + clear_bit(PG_arch_1, &folio->flags.f); folio_put(folio); + /* + * There are some valid fixup types for kernel + * accesses to donated secure memory. zeropad is one + * of them. + */ if (rc) - BUG(); + return handle_fault_error_nolock(regs, 0); } else { if (faulthandler_disabled()) return handle_fault_error_nolock(regs, 0);
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 2f82944..6ecd6b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c
@@ -62,7 +62,7 @@ int __pfault_init(void) "0: nopr %%r7\n" EX_TABLE(0b, 0b) : [rc] "+d" (rc) - : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk) + : [refbk] "a" (virt_to_phys(&pfault_init_refbk)), "m" (pfault_init_refbk) : "cc"); return rc; } @@ -84,7 +84,7 @@ void __pfault_fini(void) "0: nopr %%r7\n" EX_TABLE(0b, 0b) : - : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) + : [refbk] "a" (virt_to_phys(&pfault_fini_refbk)), "m" (pfault_fini_refbk) : "cc"); }
diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c index 143747c4..48ddbc5 100644 --- a/arch/sh/drivers/platform_early.c +++ b/arch/sh/drivers/platform_early.c
@@ -26,10 +26,6 @@ static int platform_match(struct device *dev, struct device_driver *drv) struct platform_device *pdev = to_platform_device(dev); struct platform_driver *pdrv = to_platform_driver(drv); - /* When driver_override is set, only bind to the matching driver */ - if (pdev->driver_override) - return !strcmp(pdev->driver_override, drv->name); - /* Then try to match against the id table */ if (pdrv->id_table) return platform_match_id(pdrv->id_table, pdev) != NULL;
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 008c302..169c63fb 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S
@@ -89,6 +89,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 46ef88b..7613ab0 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c
@@ -312,6 +312,8 @@ static dma_addr_t dma_4u_map_phys(struct device *dev, phys_addr_t phys, if (direction != DMA_TO_DEVICE) iopte_protection |= IOPTE_WRITE; + phys &= IO_PAGE_MASK; + for (i = 0; i < npages; i++, base++, phys += IO_PAGE_SIZE) iopte_val(*base) = iopte_protection | phys;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 7e41574..1603d50 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c
@@ -355,6 +355,13 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit + * if MSI (rather than MSI-X) capability does not have + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. + */ + dev->msi_addr_mask = DMA_BIT_MASK(64); + if (of_node_name_eq(node, "pci")) { /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 440284c..61f14b4 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c
@@ -410,6 +410,8 @@ static dma_addr_t dma_4v_map_phys(struct device *dev, phys_addr_t phys, iommu_batch_start(dev, prot, entry); + phys &= IO_PAGE_MASK; + for (i = 0; i < npages; i++, phys += IO_PAGE_SIZE) { long err = iommu_batch_add(phys, mask); if (unlikely(err < 0L))
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index f1b86eb..7ea510d 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -191,6 +191,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 012b2bc..20fc333 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c
@@ -69,11 +69,11 @@ struct io_thread_req { }; -static struct io_thread_req * (*irq_req_buffer)[]; +static struct io_thread_req **irq_req_buffer; static struct io_thread_req *irq_remainder; static int irq_remainder_size; -static struct io_thread_req * (*io_req_buffer)[]; +static struct io_thread_req **io_req_buffer; static struct io_thread_req *io_remainder; static int io_remainder_size; @@ -398,7 +398,7 @@ static int thread_fd = -1; static int bulk_req_safe_read( int fd, - struct io_thread_req * (*request_buffer)[], + struct io_thread_req **request_buffer, struct io_thread_req **remainder, int *remainder_size, int max_recs @@ -465,7 +465,7 @@ static irqreturn_t ubd_intr(int irq, void *dev) &irq_remainder, &irq_remainder_size, UBD_REQ_BUFFER_SIZE)) >= 0) { for (i = 0; i < len / sizeof(struct io_thread_req *); i++) - ubd_end_request((*irq_req_buffer)[i]); + ubd_end_request(irq_req_buffer[i]); } if (len < 0 && len != -EAGAIN) @@ -1512,7 +1512,7 @@ void *io_thread(void *arg) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - struct io_thread_req *req = (*io_req_buffer)[count]; + struct io_thread_req *req = io_req_buffer[count]; int i; io_count++;
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index a36b791..ad3cefe 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S
@@ -172,6 +172,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index a409d4b..30aa2434 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S
@@ -113,6 +113,7 @@ STABS_DEBUG DWARF_DEBUG + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 68f9d7a..b8b2b7b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile
@@ -113,6 +113,7 @@ ifdef CONFIG_EFI_SBAT $(obj)/sbat.o: $(CONFIG_EFI_SBAT_FILE) +AFLAGS_sbat.o += -I $(srctree) endif $(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index c8c1464..e468476 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c
@@ -28,17 +28,17 @@ #include "sev.h" static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); -struct ghcb *boot_ghcb; +struct ghcb *boot_ghcb __section(".data"); #undef __init #define __init #define __BOOT_COMPRESSED -u8 snp_vmpl; -u16 ghcb_version; +u8 snp_vmpl __section(".data"); +u16 ghcb_version __section(".data"); -u64 boot_svsm_caa_pa; +u64 boot_svsm_caa_pa __section(".data"); /* Include code for early handlers */ #include "../../boot/startup/sev-shared.c" @@ -188,6 +188,7 @@ bool sev_es_check_ghcb_fault(unsigned long address) MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ MSR_AMD64_SNP_SECURE_AVIC | \ + MSR_AMD64_SNP_RESERVED_BITS19_22 | \ MSR_AMD64_SNP_RESERVED_MASK) #ifdef CONFIG_AMD_SECURE_AVIC
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 587ce3e..e0b1527 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -88,7 +88,7 @@ /DISCARD/ : { *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss) *(.hash) *(.gnu.hash) - *(.note.*) + *(.note.*) *(.modinfo) } .got.plt (INFO) : {
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index a0fa8bb..d9ac3a9 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c
@@ -31,7 +31,7 @@ static u32 cpuid_std_range_max __ro_after_init; static u32 cpuid_hyp_range_max __ro_after_init; static u32 cpuid_ext_range_max __ro_after_init; -bool sev_snp_needs_sfw; +bool sev_snp_needs_sfw __section(".data"); void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 907981b..7ed3da9 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c
@@ -89,6 +89,7 @@ static const char * const sev_status_feat_names[] = { [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", + [MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry", }; /*
diff --git a/arch/x86/coco/sev/noinstr.c b/arch/x86/coco/sev/noinstr.c index 9d94aca..5afd663 100644 --- a/arch/x86/coco/sev/noinstr.c +++ b/arch/x86/coco/sev/noinstr.c
@@ -121,6 +121,9 @@ noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) WARN_ON(!irqs_disabled()); + if (!sev_cfg.ghcbs_initialized) + return boot_ghcb; + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -164,6 +167,9 @@ noinstr void __sev_put_ghcb(struct ghcb_state *state) WARN_ON(!irqs_disabled()); + if (!sev_cfg.ghcbs_initialized) + return; + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page;
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index a9b7299..fbe2d10 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c
@@ -160,8 +160,6 @@ void __init fred_complete_exception_setup(void) static noinstr void fred_extint(struct pt_regs *regs) { unsigned int vector = regs->fred_ss.vector; - unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR, - NR_SYSTEM_VECTORS); if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR)) return; @@ -170,7 +168,8 @@ static noinstr void fred_extint(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); instrumentation_begin(); - sysvec_table[index](regs); + sysvec_table[array_index_nospec(vector - FIRST_SYSTEM_VECTOR, + NR_SYSTEM_VECTORS)](regs); instrumentation_end(); irqentry_exit(regs, state); } else { @@ -178,6 +177,16 @@ static noinstr void fred_extint(struct pt_regs *regs) } } +#ifdef CONFIG_AMD_MEM_ENCRYPT +noinstr void exc_vmm_communication(struct pt_regs *regs, unsigned long error_code) +{ + if (user_mode(regs)) + return user_exc_vmm_communication(regs, error_code); + else + return kernel_exc_vmm_communication(regs, error_code); +} +#endif + static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code) { /* Optimize for #PF. That's the only exception which matters performance wise */ @@ -208,6 +217,10 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_CET case X86_TRAP_CP: return exc_control_protection(regs, error_code); #endif +#ifdef CONFIG_AMD_MEM_ENCRYPT + case X86_TRAP_VC: return exc_vmm_communication(regs, error_code); +#endif + default: return fred_bad_type(regs, error_code); }
diff --git a/arch/x86/entry/vdso/common/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c index 027b7e88..57066f3 100644 --- a/arch/x86/entry/vdso/common/vclock_gettime.c +++ b/arch/x86/entry/vdso/common/vclock_gettime.c
@@ -13,7 +13,7 @@ #include <linux/types.h> #include <vdso/gettime.h> -#include "../../../../lib/vdso/gettimeofday.c" +#include "lib/vdso/gettimeofday.c" int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) {
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index b433353..b33fcc5 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -35,9 +35,38 @@ #endif .endm +/* + * WARNING: + * + * A bug in the libgcc unwinder as of at least gcc 15.2 (2026) means that + * the unwinder fails to recognize the signal frame flag. + * + * There is a hacky legacy fallback path in libgcc which ends up + * getting invoked instead. It happens to work as long as BOTH of the + * following conditions are true: + * + * 1. There is at least one byte before the each of the sigreturn + * functions which falls outside any function. This is enforced by + * an explicit nop instruction before the ALIGN. + * 2. The code sequences between the entry point up to and including + * the int $0x80 below need to match EXACTLY. Do not change them + * in any way. The exact byte sequences are: + * + * __kernel_sigreturn: + * 0: 58 pop %eax + * 1: b8 77 00 00 00 mov $0x77,%eax + * 6: cd 80 int $0x80 + * + * __kernel_rt_sigreturn: + * 0: b8 ad 00 00 00 mov $0xad,%eax + * 5: cd 80 int $0x80 + * + * For details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050 + */ .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function + nop /* libgcc hack: see comment above */ ALIGN __kernel_sigreturn: STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext @@ -52,6 +81,7 @@ .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function + nop /* libgcc hack: see comment above */ ALIGN __kernel_rt_sigreturn: STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 03ce1bc..810ab21 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c
@@ -1372,14 +1372,17 @@ static void x86_pmu_enable(struct pmu *pmu) else if (i < n_running) continue; - if (hwc->state & PERF_HES_ARCH) + cpuc->events[hwc->idx] = event; + + if (hwc->state & PERF_HES_ARCH) { + static_call(x86_pmu_set_period)(event); continue; + } /* * if cpuc->enabled = 0, then no wrmsr as * per x86_pmu_enable_event() */ - cpuc->events[hwc->idx] = event; x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cf3a4fe..793335c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c
@@ -4628,6 +4628,19 @@ static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); } +static inline int intel_set_branch_counter_constr(struct perf_event *event, + int *num) +{ + if (branch_sample_call_stack(event)) + return -EINVAL; + if (branch_sample_counters(event)) { + (*num)++; + event->hw.dyn_constraint &= x86_pmu.lbr_counters; + } + + return 0; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -4698,21 +4711,19 @@ static int intel_pmu_hw_config(struct perf_event *event) * group, which requires the extra space to store the counters. */ leader = event->group_leader; - if (branch_sample_call_stack(leader)) + if (intel_set_branch_counter_constr(leader, &num)) return -EINVAL; - if (branch_sample_counters(leader)) { - num++; - leader->hw.dyn_constraint &= x86_pmu.lbr_counters; - } leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; for_each_sibling_event(sibling, leader) { - if (branch_sample_call_stack(sibling)) + if (intel_set_branch_counter_constr(sibling, &num)) return -EINVAL; - if (branch_sample_counters(sibling)) { - num++; - sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; - } + } + + /* event isn't installed as a sibling yet. */ + if (event != leader) { + if (intel_set_branch_counter_constr(event, &num)) + return -EINVAL; } if (num > fls(x86_pmu.lbr_counters)) @@ -4844,8 +4855,10 @@ static int intel_pmu_hw_config(struct perf_event *event) intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask); if (leader->nr_siblings) { - for_each_sibling_event(sibling, leader) - intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); + for_each_sibling_event(sibling, leader) { + if (is_x86_event(sibling)) + intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); + } } if (leader != event)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 5027afc..7f0d515 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c
@@ -345,12 +345,12 @@ static u64 parse_omr_data_source(u8 dse) if (omr.omr_remote) val |= REM; - val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); - if (omr.omr_source == 0x2) { - u8 snoop = omr.omr_snoop | omr.omr_promoted; + u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1); - if (snoop == 0x0) + if (omr.omr_hitm) + val |= P(SNOOP, HITM); + else if (snoop == 0x0) val |= P(SNOOP, NA); else if (snoop == 0x1) val |= P(SNOOP, MISS); @@ -359,7 +359,10 @@ static u64 parse_omr_data_source(u8 dse) else if (snoop == 0x3) val |= P(SNOOP, NONE); } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) { + val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0; + } else { + val |= P(SNOOP, NONE); } return val;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 5ed6e0b..0a1d081 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = { .attr_update = uncore_alias_groups, }; +static struct uncore_event_desc gnr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type gnr_uncore_imc = { + SPR_UNCORE_MMIO_COMMON_FORMAT(), + .name = "imc", + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = gnr_uncore_imc_events, +}; + static struct intel_uncore_type gnr_uncore_pciex8 = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "pciex8", @@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { NULL, &spr_uncore_pcu, &gnr_uncore_ubox, - &spr_uncore_imc, + &gnr_uncore_imc, NULL, &gnr_uncore_upi, NULL,
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c index 92da1b4..5ffcc23 100644 --- a/arch/x86/hyperv/hv_crash.c +++ b/arch/x86/hyperv/hv_crash.c
@@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void) cpu_relax(); } -/* This cannot be inlined as it needs stack */ -static noinline __noclone void hv_crash_restore_tss(void) +static void hv_crash_restore_tss(void) { load_TR_desc(); } -/* This cannot be inlined as it needs stack */ -static noinline void hv_crash_clear_kernpt(void) +static void hv_crash_clear_kernpt(void) { pgd_t *pgd; p4d_t *p4d; @@ -125,50 +123,9 @@ static noinline void hv_crash_clear_kernpt(void) native_p4d_clear(p4d); } -/* - * This is the C entry point from the asm glue code after the disable hypercall. - * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel - * page tables with our below 4G page identity mapped, but using a temporary - * GDT. ds/fs/gs/es are null. ss is not usable. bp is null. stack is not - * available. We restore kernel GDT, and rest of the context, and continue - * to kexec. - */ -static asmlinkage void __noreturn hv_crash_c_entry(void) + +static void __noreturn hv_crash_handle(void) { - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt; - - /* first thing, restore kernel gdt */ - native_load_gdt(&ctxt->gdtr); - - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss)); - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp)); - - asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds)); - asm volatile("movw %%ax, %%es" : : "a"(ctxt->es)); - asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs)); - asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs)); - - native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat); - asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0)); - - asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8)); - asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4)); - asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4)); - - native_load_idt(&ctxt->idtr); - native_wrmsrq(MSR_GS_BASE, ctxt->gsbase); - native_wrmsrq(MSR_EFER, ctxt->efer); - - /* restore the original kernel CS now via far return */ - asm volatile("movzwq %0, %%rax\n\t" - "pushq %%rax\n\t" - "pushq $1f\n\t" - "lretq\n\t" - "1:nop\n\t" : : "m"(ctxt->cs) : "rax"); - - /* We are in asmlinkage without stack frame, hence make C function - * calls which will buy stack frames. - */ hv_crash_restore_tss(); hv_crash_clear_kernpt(); @@ -177,7 +134,54 @@ static asmlinkage void __noreturn hv_crash_c_entry(void) hv_panic_timeout_reboot(); } -/* Tell gcc we are using lretq long jump in the above function intentionally */ + +/* + * __naked functions do not permit function calls, not even to __always_inline + * functions that only contain asm() blocks themselves. So use a macro instead. + */ +#define hv_wrmsr(msr, val) \ + asm volatile("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory") + +/* + * This is the C entry point from the asm glue code after the disable hypercall. + * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel + * page tables with our below 4G page identity mapped, but using a temporary + * GDT. ds/fs/gs/es are null. ss is not usable. bp is null. stack is not + * available. We restore kernel GDT, and rest of the context, and continue + * to kexec. + */ +static void __naked hv_crash_c_entry(void) +{ + /* first thing, restore kernel gdt */ + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr)); + + asm volatile("movw %0, %%ss\n\t" + "movq %1, %%rsp" + :: "m"(hv_crash_ctxt.ss), "m"(hv_crash_ctxt.rsp)); + + asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds)); + asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es)); + asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs)); + asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs)); + + hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat); + asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0)); + + asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8)); + asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4)); + asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr2)); + + asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr)); + hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase); + hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer); + + /* restore the original kernel CS now via far return */ + asm volatile("pushq %q0\n\t" + "pushq %q1\n\t" + "lretq" + :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle)); +} +/* Tell objtool we are using lretq long jump in the above function intentionally */ STACK_FRAME_NON_STANDARD(hv_crash_c_entry); static void hv_mark_tss_not_busy(void) @@ -195,20 +199,20 @@ static void hv_hvcrash_ctxt_save(void) { struct hv_crash_ctxt *ctxt = &hv_crash_ctxt; - asm volatile("movq %%rsp,%0" : "=m"(ctxt->rsp)); + ctxt->rsp = current_stack_pointer; ctxt->cr0 = native_read_cr0(); ctxt->cr4 = native_read_cr4(); - asm volatile("movq %%cr2, %0" : "=a"(ctxt->cr2)); - asm volatile("movq %%cr8, %0" : "=a"(ctxt->cr8)); + asm volatile("movq %%cr2, %0" : "=r"(ctxt->cr2)); + asm volatile("movq %%cr8, %0" : "=r"(ctxt->cr8)); - asm volatile("movl %%cs, %%eax" : "=a"(ctxt->cs)); - asm volatile("movl %%ss, %%eax" : "=a"(ctxt->ss)); - asm volatile("movl %%ds, %%eax" : "=a"(ctxt->ds)); - asm volatile("movl %%es, %%eax" : "=a"(ctxt->es)); - asm volatile("movl %%fs, %%eax" : "=a"(ctxt->fs)); - asm volatile("movl %%gs, %%eax" : "=a"(ctxt->gs)); + asm volatile("movw %%cs, %0" : "=m"(ctxt->cs)); + asm volatile("movw %%ss, %0" : "=m"(ctxt->ss)); + asm volatile("movw %%ds, %0" : "=m"(ctxt->ds)); + asm volatile("movw %%es, %0" : "=m"(ctxt->es)); + asm volatile("movw %%fs, %0" : "=m"(ctxt->fs)); + asm volatile("movw %%gs, %0" : "=m"(ctxt->gs)); native_store_gdt(&ctxt->gdtr); store_idt(&ctxt->idtr);
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 9b4e046..80c1696 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h
@@ -7,7 +7,7 @@ #include <linux/objtool.h> #include <asm/asm.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct bug_entry; extern void __WARN_trap(struct bug_entry *bug, ...); #endif @@ -137,7 +137,7 @@ do { \ #ifdef HAVE_ARCH_BUG_FORMAT_ARGS -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/static_call_types.h> DECLARE_STATIC_CALL(WARN_trap, __WARN_trap); @@ -153,7 +153,7 @@ struct arch_va_list { struct sysv_va_list args; }; extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define __WARN_bug_entry(flags, format) ({ \ struct bug_entry *bug; \
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index c40b9eb..ab3fbbd 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h
@@ -111,6 +111,12 @@ extern bhi_thunk __bhi_args_end[]; struct pt_regs; +#ifdef CONFIG_CALL_PADDING +#define CFI_OFFSET (CONFIG_FUNCTION_PADDING_CFI+5) +#else +#define CFI_OFFSET 5 +#endif + #ifdef CONFIG_CFI enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall @@ -119,11 +125,9 @@ static inline int cfi_get_offset(void) { switch (cfi_mode) { case CFI_FINEIBT: - return 16; + return /* fineibt_prefix_size */ 16; case CFI_KCFI: - if (IS_ENABLED(CONFIG_CALL_PADDING)) - return 16; - return 5; + return CFI_OFFSET; default: return 0; }
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index f227a70..51b4cdb 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h
@@ -138,7 +138,7 @@ extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); -extern void efi_free_boot_services(void); +extern void efi_unmap_boot_services(void); void arch_efi_call_virt_setup(void); void arch_efi_call_virt_teardown(void);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index a1193e9..462754b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h
@@ -77,7 +77,7 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #endif #ifndef CONFIG_PARAVIRT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Used in the idle loop; sti takes one instruction cycle * to complete: @@ -95,7 +95,7 @@ static __always_inline void halt(void) { native_halt(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_PARAVIRT */ #ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45..6e4e3ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
@@ -2485,7 +2485,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ KVM_X86_QUIRK_SLOT_ZAP_ALL | \ KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ - KVM_X86_QUIRK_IGNORE_GUEST_PAT) + KVM_X86_QUIRK_IGNORE_GUEST_PAT | \ + KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM) #define KVM_X86_CONDITIONAL_QUIRKS \ (KVM_X86_QUIRK_CD_NW_CLEARED | \
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9d38ae7..a729465 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h
@@ -68,7 +68,7 @@ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the * CFI symbol layout changes. * - * Without CALL_THUNKS: + * Without CALL_PADDING: * * .align FUNCTION_ALIGNMENT * __cfi_##name: @@ -77,7 +77,7 @@ * .long __kcfi_typeid_##name * name: * - * With CALL_THUNKS: + * With CALL_PADDING: * * .align FUNCTION_ALIGNMENT * __cfi_##name:
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index da5275d..6673601 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h
@@ -740,7 +740,10 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) -#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19) +#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23 +#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT) +#define MSR_AMD64_SNP_RESV_BIT 24 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 #define MSR_AMD64_SAVIC_EN_BIT 0
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 53ba39c..a9063f3 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h
@@ -22,6 +22,7 @@ extern int numa_off; */ extern s16 __apicid_to_node[MAX_LOCAL_APIC]; extern nodemask_t numa_nodes_parsed __initdata; +extern nodemask_t numa_phys_nodes_parsed __initdata; static inline void set_apicid_to_node(int apicid, s16 node) { @@ -48,6 +49,7 @@ extern void __init init_cpu_to_node(void); extern void numa_add_cpu(unsigned int cpu); extern void numa_remove_cpu(unsigned int cpu); extern void init_gi_nodes(void); +extern int num_phys_nodes(void); #else /* CONFIG_NUMA */ static inline void numa_set_node(int cpu, int node) { } static inline void numa_clear_node(int cpu) { } @@ -55,6 +57,10 @@ static inline void init_cpu_to_node(void) { } static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } static inline void init_gi_nodes(void) { } +static inline int num_phys_nodes(void) +{ + return 1; +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DEBUG_PER_CPU_MAPS
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index c55058f..4099814 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h
@@ -20,7 +20,7 @@ #define PER_CPU_VAR(var) __percpu(var)__percpu_rel -#else /* !__ASSEMBLY__: */ +#else /* !__ASSEMBLER__: */ #include <linux/args.h> #include <linux/bits.h>
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f06e5d6..ce45882 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h
@@ -19,10 +19,8 @@ extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; -extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; -extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[];
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index e5a13dc..4cd94fdc 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h
@@ -6,7 +6,7 @@ #error "Cannot use runtime-const infrastructure from modules" #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro RUNTIME_CONST_PTR sym reg movq $0x0123456789abcdef, %\reg @@ -16,7 +16,7 @@ .popsection .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define runtime_const_ptr(sym) ({ \ typeof(sym) __ret; \ @@ -74,5 +74,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), } } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 1fadf0c..0ba9bdb 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h
@@ -155,6 +155,7 @@ extern unsigned int __max_logical_packages; extern unsigned int __max_threads_per_core; extern unsigned int __num_threads_per_package; extern unsigned int __num_cores_per_package; +extern unsigned int __num_nodes_per_package; const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); @@ -179,6 +180,11 @@ static inline unsigned int topology_num_threads_per_package(void) return __num_threads_per_package; } +static inline unsigned int topology_num_nodes_per_package(void) +{ + return __num_nodes_per_package; +} + #ifdef CONFIG_X86_LOCAL_APIC int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); #else
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 869b880..3f24cc4 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h
@@ -25,6 +25,8 @@ extern int ibt_selftest_noendbr(void); void handle_invalid_op(struct pt_regs *regs); #endif +noinstr bool handle_bug(struct pt_regs *regs); + static inline int get_si_code(unsigned long condition) { if (condition & DR_STEP)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 846a632..0d4538f 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h
@@ -476,6 +476,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) +#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e9aeeea..47a32f5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile
@@ -44,6 +44,20 @@ KCOV_INSTRUMENT_unwind_frame.o := n KCOV_INSTRUMENT_unwind_guess.o := n +# Disable KCOV to prevent crashes during kexec: load_segments() invalidates +# the GS base, which KCOV relies on for per-CPU data. +# +# As KCOV and KEXEC compatibility should be preserved (e.g. syzkaller is +# using it to collect crash dumps during kernel fuzzing), disabling +# KCOV for KEXEC kernels is not an option. Selectively disabling KCOV +# instrumentation for individual affected functions can be fragile, while +# adding more checks to KCOV would slow it down. +# +# As a compromise solution, disable KCOV instrumentation for the whole +# source code file. If its coverage is ever needed, other approaches +# should be considered. +KCOV_INSTRUMENT_machine_kexec_64.o := n + CFLAGS_head32.o := -fno-stack-protector CFLAGS_head64.o := -fno-stack-protector CFLAGS_irq.o := -I $(src)/../include/asm/trace
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a888ae0..e87da25 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c
@@ -1182,7 +1182,7 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) poison_endbr(addr); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16); + poison_cfi(addr - CFI_OFFSET); } } @@ -1389,6 +1389,8 @@ extern u8 fineibt_preamble_end[]; #define fineibt_preamble_ud 0x13 #define fineibt_preamble_hash 5 +#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) + /* * <fineibt_caller_start>: * 0: b8 78 56 34 12 mov $0x12345678, %eax @@ -1634,7 +1636,7 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) * have determined there are no indirect calls to it and we * don't need no CFI either. */ - if (!is_endbr(addr + 16)) + if (!is_endbr(addr + CFI_OFFSET)) continue; hash = decode_preamble_hash(addr, &arity); @@ -1642,6 +1644,15 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) addr, addr, 5, addr)) return -EINVAL; + /* + * FineIBT relies on being at func-16, so if the preamble is + * actually larger than that, place it the tail end. + * + * NOTE: this is possible with things like DEBUG_CALL_THUNKS + * and DEBUG_FORCE_FUNCTION_ALIGN_64B. + */ + addr += CFI_OFFSET - fineibt_prefix_size; + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); text_poke_early(addr + fineibt_preamble_hash, &hash, 4); @@ -1664,10 +1675,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end) for (s = start; s < end; s++) { void *addr = (void *)s + *s; - if (!exact_endbr(addr + 16)) + if (!exact_endbr(addr + CFI_OFFSET)) continue; - poison_endbr(addr + 16); + poison_endbr(addr + CFI_OFFSET); } } @@ -1772,7 +1783,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, if (FINEIBT_WARN(fineibt_preamble_size, 20) || FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || FINEIBT_WARN(fineibt_caller_size, 14) || - FINEIBT_WARN(fineibt_paranoid_size, 20)) + FINEIBT_WARN(fineibt_paranoid_size, 20) || + WARN_ON_ONCE(CFI_OFFSET < fineibt_prefix_size)) return; if (cfi_mode == CFI_AUTO) { @@ -1886,6 +1898,11 @@ static void poison_cfi(void *addr) switch (cfi_mode) { case CFI_FINEIBT: /* + * FineIBT preamble is at func-16. + */ + addr += CFI_OFFSET - fineibt_prefix_size; + + /* * FineIBT prefix should start with an ENDBR. */ if (!is_endbr(addr)) @@ -1923,8 +1940,6 @@ static void poison_cfi(void *addr) } } -#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) - /* * When regs->ip points to a 0xD6 byte in the FineIBT preamble, * return true and fill out target and type.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d93f87f..961714e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c
@@ -1894,6 +1894,7 @@ void __init check_x2apic(void) static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } +static inline void __x2apic_disable(void) { } #endif /* !CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) @@ -2456,6 +2457,11 @@ static void lapic_resume(void *data) if (x2apic_mode) { __x2apic_enable(); } else { + if (x2apic_enabled()) { + pr_warn_once("x2apic: re-enabled by firmware during resume. Disabling\n"); + __x2apic_disable(); + } + /* * Make sure the APICBASE points to the right address *
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 15209f2..42568ce 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1708,8 +1708,22 @@ static void __init uv_system_init_hub(void) struct uv_hub_info_s *new_hub; /* Allocate & fill new per hub info list */ - new_hub = (bid == 0) ? &uv_hub_info_node0 - : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (bid == 0) { + new_hub = &uv_hub_info_node0; + } else { + int nid; + + /* + * Deconfigured sockets are mapped to SOCK_EMPTY. Use + * NUMA_NO_NODE to allocate on a valid node. + */ + nid = uv_blade_to_node(bid); + if (nid == SOCK_EMPTY) + nid = NUMA_NO_NODE; + + new_hub = kzalloc_node(bytes, GFP_KERNEL, nid); + } + if (WARN_ON_ONCE(!new_hub)) { /* do not kfree() bid 0, which is statically allocated */ while (--bid > 0)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1c3261c..ec06701 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c
@@ -95,6 +95,9 @@ EXPORT_SYMBOL(__max_dies_per_package); unsigned int __max_logical_packages __ro_after_init = 1; EXPORT_SYMBOL(__max_logical_packages); +unsigned int __num_nodes_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_nodes_per_package); + unsigned int __num_cores_per_package __ro_after_init = 1; EXPORT_SYMBOL(__num_cores_per_package); @@ -430,7 +433,20 @@ static __always_inline void setup_lass(struct cpuinfo_x86 *c) /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | - X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; + X86_CR4_FSGSBASE | X86_CR4_CET; + +/* + * The CR pinning protects against ROP on the 'mov %reg, %CRn' instruction(s). + * Since you can ROP directly to these instructions (barring shadow stack), + * any protection must follow immediately and unconditionally after that. + * + * Specifically, the CR[04] write functions below will have the value + * validation controlled by the @cr_pinning static_branch which is + * __ro_after_init, just like the cr4_pinned_bits value. + * + * Once set, an attacker will have to defeat page-tables to get around these + * restrictions. Which is a much bigger ask than 'simple' ROP. + */ static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -2047,12 +2063,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_umip(c); setup_lass(c); - /* Enable FSGSBASE instructions if available. */ - if (cpu_has(c, X86_FEATURE_FSGSBASE)) { - cr4_set_bits(X86_CR4_FSGSBASE); - elf_hwcap2 |= HWCAP2_FSGSBASE; - } - /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -2413,6 +2423,18 @@ void cpu_init_exception_handling(bool boot_cpu) /* GHCB needs to be setup to handle #VC. */ setup_ghcb(); + /* + * On CPUs with FSGSBASE support, paranoid_entry() uses + * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs + * boot after alternatives are patched globally, so early exceptions + * execute patched code that depends on FSGSBASE. Enable the feature + * before any exceptions occur. + */ + if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) { + cr4_set_bits(X86_CR4_FSGSBASE); + elf_hwcap2 |= HWCAP2_FSGSBASE; + } + if (cpu_feature_enabled(X86_FEATURE_FRED)) { /* The boot CPU has enabled FRED during early boot */ if (!boot_cpu)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index da13c1e..a030ee4 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -875,13 +875,18 @@ void amd_clear_bank(struct mce *m) { amd_reset_thr_limit(m->bank); - /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ - if (m->status & MCI_STATUS_DEFERRED) - mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); + if (mce_flags.smca) { + /* + * Clear MCA_DESTAT for all deferred errors even those + * logged in MCA_STATUS. + */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); - /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ - if (m->kflags & MCE_CHECK_DFR_REGS) - return; + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; + } mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); }
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 89a2eb8..9befdc5 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -496,8 +496,9 @@ static void hv_reserve_irq_vectors(void) test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors)) BUG(); - pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR, - HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR); + pr_info("Hyper-V: reserve vectors: 0x%x 0x%x 0x%x\n", + HYPERV_DBG_ASSERT_VECTOR, HYPERV_DBG_SERVICE_VECTOR, + HYPERV_DBG_FASTFAIL_VECTOR); } static void __init ms_hyperv_init_platform(void)
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e6a1542..9bd87ba 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -364,7 +364,7 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d) msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); } -/* CPU models that support MSR_RMID_SNC_CONFIG */ +/* CPU models that support SNC and MSR_RMID_SNC_CONFIG */ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_ICELAKE_X, 0), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), @@ -375,40 +375,14 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { {} }; -/* - * There isn't a simple hardware bit that indicates whether a CPU is running - * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the - * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in - * the same NUMA node as CPU0. - * It is not possible to accurately determine SNC state if the system is - * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes - * to L3 caches. It will be OK if system is booted with hyperthreading - * disabled (since this doesn't affect the ratio). - */ static __init int snc_get_config(void) { - struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); - const cpumask_t *node0_cpumask; - int cpus_per_node, cpus_per_l3; - int ret; + int ret = topology_num_nodes_per_package(); - if (!x86_match_cpu(snc_cpu_ids) || !ci) + if (ret > 1 && !x86_match_cpu(snc_cpu_ids)) { + pr_warn("CoD enabled system? Resctrl not supported\n"); return 1; - - cpus_read_lock(); - if (num_online_cpus() != num_present_cpus()) - pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); - cpus_read_unlock(); - - node0_cpumask = cpumask_of_node(cpu_to_node(0)); - - cpus_per_node = cpumask_weight(node0_cpumask); - cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); - - if (!cpus_per_node || !cpus_per_l3) - return 1; - - ret = cpus_per_l3 / cpus_per_node; + } /* sanity check: Only valid results are 1, 2, 3, 4, 6 */ switch (ret) {
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 23190a7..eafcb1f 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c
@@ -31,6 +31,7 @@ #include <asm/mpspec.h> #include <asm/msr.h> #include <asm/smp.h> +#include <asm/numa.h> #include "cpu.h" @@ -492,11 +493,19 @@ void __init topology_init_possible_cpus(void) set_nr_cpu_ids(allowed); cnta = domain_weight(TOPO_PKG_DOMAIN); - cntb = domain_weight(TOPO_DIE_DOMAIN); __max_logical_packages = cnta; + + pr_info("Max. logical packages: %3u\n", __max_logical_packages); + + cntb = num_phys_nodes(); + __num_nodes_per_package = DIV_ROUND_UP(cntb, cnta); + + pr_info("Max. logical nodes: %3u\n", cntb); + pr_info("Num. nodes per package:%3u\n", __num_nodes_per_package); + + cntb = domain_weight(TOPO_DIE_DOMAIN); __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); - pr_info("Max. logical packages: %3u\n", cnta); pr_info("Max. logical dies: %3u\n", cntb); pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 21816b4..85d4a50 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S
@@ -616,38 +616,10 @@ .data -#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH) -SYM_DATA_START_PTI_ALIGNED(init_top_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + L4_PAGE_OFFSET*8, 0 - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + L4_START_KERNEL*8, 0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - .fill PTI_USER_PGD_FILL,8,0 -SYM_DATA_END(init_top_pgt) - -SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt) - .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .fill 511, 8, 0 -SYM_DATA_END(level3_ident_pgt) -SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt) - /* - * Since I easily can, map the first 1G. - * Don't set NX because code runs from these pages. - * - * Note: This sets _PAGE_GLOBAL despite whether - * the CPU supports it or it is enabled. But, - * the CPU should ignore the bit. - */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) -SYM_DATA_END(level2_ident_pgt) -#else SYM_DATA_START_PTI_ALIGNED(init_top_pgt) .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 SYM_DATA_END(init_top_pgt) -#endif SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) .fill 511,8,0
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5cd6950..294a8ea 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c
@@ -468,13 +468,6 @@ static int x86_cluster_flags(void) } #endif -/* - * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours, Intel Cluster-on-Die, and Intel - * Sub-NUMA Clustering have this. - */ -static bool x86_has_numa_in_package; - static struct sched_domain_topology_level x86_topology[] = { SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), #ifdef CONFIG_SCHED_CLUSTER @@ -496,7 +489,7 @@ static void __init build_sched_topology(void) * PKG domain since the NUMA domains will auto-magically create the * right spanning domains based on the SLIT. */ - if (x86_has_numa_in_package) { + if (topology_num_nodes_per_package() > 1) { unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2; memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom])); @@ -513,33 +506,149 @@ static void __init build_sched_topology(void) } #ifdef CONFIG_NUMA -static int sched_avg_remote_distance; -static int avg_remote_numa_distance(void) +/* + * Test if the on-trace cluster at (N,N) is symmetric. + * Uses upper triangle iteration to avoid obvious duplicates. + */ +static bool slit_cluster_symmetric(int N) { - int i, j; - int distance, nr_remote, total_distance; + int u = topology_num_nodes_per_package(); - if (sched_avg_remote_distance > 0) - return sched_avg_remote_distance; - - nr_remote = 0; - total_distance = 0; - for_each_node_state(i, N_CPU) { - for_each_node_state(j, N_CPU) { - distance = node_distance(i, j); - - if (distance >= REMOTE_DISTANCE) { - nr_remote++; - total_distance += distance; - } + for (int k = 0; k < u; k++) { + for (int l = k; l < u; l++) { + if (node_distance(N + k, N + l) != + node_distance(N + l, N + k)) + return false; } } - if (nr_remote) - sched_avg_remote_distance = total_distance / nr_remote; - else - sched_avg_remote_distance = REMOTE_DISTANCE; - return sched_avg_remote_distance; + return true; +} + +/* + * Return the package-id of the cluster, or ~0 if indeterminate. + * Each node in the on-trace cluster should have the same package-id. + */ +static u32 slit_cluster_package(int N) +{ + int u = topology_num_nodes_per_package(); + u32 pkg_id = ~0; + + for (int n = 0; n < u; n++) { + const struct cpumask *cpus = cpumask_of_node(N + n); + int cpu; + + for_each_cpu(cpu, cpus) { + u32 id = topology_logical_package_id(cpu); + + if (pkg_id == ~0) + pkg_id = id; + if (pkg_id != id) + return ~0; + } + } + + return pkg_id; +} + +/* + * Validate the SLIT table is of the form expected for SNC, specifically: + * + * - each on-trace cluster should be symmetric, + * - each on-trace cluster should have a unique package-id. + * + * If you NUMA_EMU on top of SNC, you get to keep the pieces. + */ +static bool slit_validate(void) +{ + int u = topology_num_nodes_per_package(); + u32 pkg_id, prev_pkg_id = ~0; + + for (int pkg = 0; pkg < topology_max_packages(); pkg++) { + int n = pkg * u; + + /* + * Ensure the on-trace cluster is symmetric and each cluster + * has a different package id. + */ + if (!slit_cluster_symmetric(n)) + return false; + pkg_id = slit_cluster_package(n); + if (pkg_id == ~0) + return false; + if (pkg && pkg_id == prev_pkg_id) + return false; + + prev_pkg_id = pkg_id; + } + + return true; +} + +/* + * Compute a sanitized SLIT table for SNC; notably SNC-3 can end up with + * asymmetric off-trace clusters, reflecting physical assymmetries. However + * this leads to 'unfortunate' sched_domain configurations. + * + * For example dual socket GNR with SNC-3: + * + * node distances: + * node 0 1 2 3 4 5 + * 0: 10 15 17 21 28 26 + * 1: 15 10 15 23 26 23 + * 2: 17 15 10 26 23 21 + * 3: 21 28 26 10 15 17 + * 4: 23 26 23 15 10 15 + * 5: 26 23 21 17 15 10 + * + * Fix things up by averaging out the off-trace clusters; resulting in: + * + * node 0 1 2 3 4 5 + * 0: 10 15 17 24 24 24 + * 1: 15 10 15 24 24 24 + * 2: 17 15 10 24 24 24 + * 3: 24 24 24 10 15 17 + * 4: 24 24 24 15 10 15 + * 5: 24 24 24 17 15 10 + */ +static int slit_cluster_distance(int i, int j) +{ + static int slit_valid = -1; + int u = topology_num_nodes_per_package(); + long d = 0; + int x, y; + + if (slit_valid < 0) { + slit_valid = slit_validate(); + if (!slit_valid) + pr_err(FW_BUG "SLIT table doesn't have the expected form for SNC -- fixup disabled!\n"); + else + pr_info("Fixing up SNC SLIT table.\n"); + } + + /* + * Is this a unit cluster on the trace? + */ + if ((i / u) == (j / u) || !slit_valid) + return node_distance(i, j); + + /* + * Off-trace cluster. + * + * Notably average out the symmetric pair of off-trace clusters to + * ensure the resulting SLIT table is symmetric. + */ + x = i - (i % u); + y = j - (j % u); + + for (i = x; i < x + u; i++) { + for (j = y; j < y + u; j++) { + d += node_distance(i, j); + d += node_distance(j, i); + } + } + + return d / (2*u*u); } int arch_sched_node_distance(int from, int to) @@ -549,34 +658,14 @@ int arch_sched_node_distance(int from, int to) switch (boot_cpu_data.x86_vfm) { case INTEL_GRANITERAPIDS_X: case INTEL_ATOM_DARKMONT_X: - - if (!x86_has_numa_in_package || topology_max_packages() == 1 || - d < REMOTE_DISTANCE) + if (topology_max_packages() == 1 || + topology_num_nodes_per_package() < 3) return d; /* - * With SNC enabled, there could be too many levels of remote - * NUMA node distances, creating NUMA domain levels - * including local nodes and partial remote nodes. - * - * Trim finer distance tuning for NUMA nodes in remote package - * for the purpose of building sched domains. Group NUMA nodes - * in the remote package in the same sched group. - * Simplify NUMA domains and avoid extra NUMA levels including - * different remote NUMA nodes and local nodes. - * - * GNR and CWF don't expect systems with more than 2 packages - * and more than 2 hops between packages. Single average remote - * distance won't be appropriate if there are more than 2 - * packages as average distance to different remote packages - * could be different. + * Handle SNC-3 asymmetries. */ - WARN_ONCE(topology_max_packages() > 2, - "sched: Expect only up to 2 packages for GNR or CWF, " - "but saw %d packages when building sched domains.", - topology_max_packages()); - - d = avg_remote_numa_distance(); + return slit_cluster_distance(from, to); } return d; } @@ -606,7 +695,7 @@ void set_cpu_sibling_map(int cpu) o = &cpu_data(i); if (match_pkg(c, o) && !topology_same_node(c, o)) - x86_has_numa_in_package = true; + WARN_ON_ONCE(topology_num_nodes_per_package() == 1); if ((i == cpu) || (has_smt && match_smt(c, o))) link_mask(topology_sibling_cpumask, cpu, i);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5a6a772..4dbff8e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c
@@ -397,7 +397,7 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -static noinstr bool handle_bug(struct pt_regs *regs) +noinstr bool handle_bug(struct pt_regs *regs) { unsigned long addr = regs->ip; bool handled = false;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3a24a3f..4711a35 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S
@@ -427,6 +427,7 @@ .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } #endif + MODINFO ELF_DETAILS DISCARDS
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d916bd7..801bf9e 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM_X86 def_tristate KVM if (KVM_INTEL != n || KVM_AMD != n) select KVM_COMMON - select KVM_GENERIC_MMU_NOTIFIER select KVM_ELIDE_TLB_FLUSH_IF_YOUNG select KVM_MMU_LOCKLESS_AGING select HAVE_KVM_IRQCHIP
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d248650..8137927 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c
@@ -776,7 +776,10 @@ do { \ #define SYNTHESIZED_F(name) \ ({ \ kvm_cpu_cap_synthesized |= feature_bit(name); \ - F(name); \ + \ + BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + if (boot_cpu_has(X86_FEATURE_##name)) \ + F(name); \ }) /*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 3020294..9b140bb 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c
@@ -1981,16 +1981,17 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; - if (is_noncanonical_invlpg_address(entries[i], vcpu)) - continue; - /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. */ gva = entries[i] & PAGE_MASK; - for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) + for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) { + if (is_noncanonical_invlpg_address(gva + j * PAGE_SIZE, vcpu)) + continue; + kvm_x86_call(flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE); + } ++vcpu->stat.tlb_flush; }
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bb25779..eed96ff 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c
@@ -321,7 +321,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kimn, &ioapic->mask_notifier_list, link) + hlist_for_each_entry_srcu(kimn, &ioapic->mask_notifier_list, link, + srcu_read_lock_held(&kvm->irq_srcu)) if (kimn->irq == gsi) kimn->func(kimn, mask); srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b922a8b..dd06453 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c
@@ -3044,12 +3044,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, bool prefetch = !fault || fault->prefetch; bool write_fault = fault && fault->write; - if (unlikely(is_noslot_pfn(pfn))) { - vcpu->stat.pf_mmio_spte_created++; - mark_mmio_spte(vcpu, sptep, gfn, pte_access); - return RET_PF_EMULATE; - } - if (is_shadow_present_pte(*sptep)) { if (prefetch && is_last_spte(*sptep, level) && pfn == spte_to_pfn(*sptep)) @@ -3066,13 +3060,22 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, child = spte_to_child_sp(pte); drop_parent_pte(vcpu->kvm, child, sptep); flush = true; - } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) { + } else if (pfn != spte_to_pfn(*sptep)) { + WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct); drop_spte(vcpu->kvm, sptep); flush = true; } else was_rmapped = 1; } + if (unlikely(is_noslot_pfn(pfn))) { + vcpu->stat.pf_mmio_spte_created++; + mark_mmio_spte(vcpu, sptep, gfn, pte_access); + if (flush) + kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); + return RET_PF_EMULATE; + } + wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch, false, host_writable, &spte);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f92214b..f7ec791 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c
@@ -189,12 +189,12 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); - vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + /* * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR * accesses, while interrupt injection to a running vCPU can be @@ -226,6 +226,9 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; + if (!sev_es_guest(svm->vcpu.kvm)) + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); + /* * If running nested and the guest uses its own MSR bitmap, there * is no need to update L0's msr bitmap @@ -368,7 +371,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb) vmcb->control.avic_physical_id = __sme_set(__pa(kvm_svm->avic_physical_id_table)); vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE; - if (kvm_apicv_activated(svm->vcpu.kvm)) + if (kvm_vcpu_apicv_active(&svm->vcpu)) avic_activate_vmcb(svm); else avic_deactivate_vmcb(svm);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 53ab6ce..b36c332 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c
@@ -418,6 +418,15 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) return __nested_vmcb_check_controls(vcpu, ctl); } +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu) +{ + if (!nested_vmcb_check_save(vcpu) || + !nested_vmcb_check_controls(vcpu)) + return -EINVAL; + + return 0; +} + /* * If a feature is not advertised to L1, clear the corresponding vmcb12 * intercept. @@ -1028,8 +1037,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - if (!nested_vmcb_check_save(vcpu) || - !nested_vmcb_check_controls(vcpu)) { + if (nested_svm_check_cached_vmcb12(vcpu) < 0) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8f8bc86..e6477af 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c
@@ -1077,8 +1077,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event) svm_set_intercept(svm, INTERCEPT_CR0_WRITE); svm_set_intercept(svm, INTERCEPT_CR3_WRITE); svm_set_intercept(svm, INTERCEPT_CR4_WRITE); - if (!kvm_vcpu_apicv_active(vcpu)) - svm_set_intercept(svm, INTERCEPT_CR8_WRITE); + svm_set_intercept(svm, INTERCEPT_CR8_WRITE); set_dr_intercepts(svm); @@ -1189,7 +1188,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event) if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS)) svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP; - if (kvm_vcpu_apicv_active(vcpu)) + if (enable_apicv && irqchip_in_kernel(vcpu->kvm)) avic_init_vmcb(svm, vmcb); if (vnmi) @@ -2674,9 +2673,11 @@ static int dr_interception(struct kvm_vcpu *vcpu) static int cr8_write_interception(struct kvm_vcpu *vcpu) { + u8 cr8_prev = kvm_get_cr8(vcpu); int r; - u8 cr8_prev = kvm_get_cr8(vcpu); + WARN_ON_ONCE(kvm_vcpu_apicv_active(vcpu)); + /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(vcpu); if (lapic_in_kernel(vcpu)) @@ -4879,11 +4880,15 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); - if (ret) + if (nested_svm_check_cached_vmcb12(vcpu) < 0) goto unmap_save; + if (enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, + vmcb12, false) != 0) + goto unmap_save; + + ret = 0; svm->nested.nested_run_pending = 1; unmap_save:
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ebd7b36..6942e6b 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h
@@ -797,6 +797,7 @@ static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct kvm_vcpu *vcpu); +int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 248635d..937aeb4 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c
@@ -3300,10 +3300,24 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP))) return -EINVAL; - if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || - CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) - return -EINVAL; + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + u64 debugctl = vmcs12->guest_ia32_debugctl; + + /* + * FREEZE_IN_SMM is not virtualized, but allow L1 to set it in + * vmcs12's DEBUGCTL under a quirk for backwards compatibility. + * Note that the quirk only relaxes the consistency check. The + * vmcc02 bit is still under the control of the host. In + * particular, if a host administrator decides to clear the bit, + * then L1 has no say in the matter. + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM)) + debugctl &= ~DEBUGCTLMSR_FREEZE_IN_SMM; + + if (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, debugctl, false))) + return -EINVAL; + } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) @@ -6842,13 +6856,34 @@ void vmx_leave_nested(struct kvm_vcpu *vcpu) free_nested(vcpu); } +int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu) +{ + enum vm_entry_failure_code ignored; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (nested_cpu_has_shadow_vmcs(vmcs12) && + vmcs12->vmcs_link_pointer != INVALID_GPA) { + struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); + + if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || + !shadow_vmcs12->hdr.shadow_vmcs) + return -EINVAL; + } + + if (nested_vmx_check_controls(vcpu, vmcs12) || + nested_vmx_check_host_state(vcpu, vmcs12) || + nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) + return -EINVAL; + + return 0; +} + static int vmx_set_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - enum vm_entry_failure_code ignored; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; int ret; @@ -6979,25 +7014,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmx->nested.mtf_pending = !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING); - ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != INVALID_GPA) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); + ret = -EINVAL; if (kvm_state->size < sizeof(*kvm_state) + sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) goto error_guest_mode; + ret = -EFAULT; if (copy_from_user(shadow_vmcs12, user_vmx_nested_state->shadow_vmcs12, - sizeof(*shadow_vmcs12))) { - ret = -EFAULT; - goto error_guest_mode; - } - - if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || - !shadow_vmcs12->hdr.shadow_vmcs) + sizeof(*shadow_vmcs12))) goto error_guest_mode; } @@ -7008,9 +7038,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, kvm_state->hdr.vmx.preemption_timer_deadline; } - if (nested_vmx_check_controls(vcpu, vmcs12) || - nested_vmx_check_host_state(vcpu, vmcs12) || - nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) + ret = nested_vmx_check_restored_vmcs12(vcpu); + if (ret < 0) goto error_guest_mode; vmx->nested.dirty_vmcs12 = true;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index b844c5d..213a448 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h
@@ -22,6 +22,7 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); +int nested_vmx_check_restored_vmcs12(struct kvm_vcpu *vcpu); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 967b58a..8b24e68 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c
@@ -1149,7 +1149,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, } vmx_add_auto_msr(&m->guest, msr, guest_val, VM_ENTRY_MSR_LOAD_COUNT, kvm); - vmx_add_auto_msr(&m->guest, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm); + vmx_add_auto_msr(&m->host, msr, host_val, VM_EXIT_MSR_LOAD_COUNT, kvm); } static bool update_transition_efer(struct vcpu_vmx *vmx) @@ -8528,9 +8528,13 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) } if (vmx->nested.smm.guest_mode) { + /* Triple fault if the state is invalid. */ + if (nested_vmx_check_restored_vmcs12(vcpu) < 0) + return 1; + ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) - return ret; + if (ret != NVMX_VMENTRY_SUCCESS) + return 1; vmx->nested.nested_run_pending = 1; vmx->nested.smm.guest_mode = false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3fb64905..fd1c4a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -243,7 +243,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); -const struct _kvm_stats_desc kvm_vm_stats_desc[] = { +const struct kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), STATS_DESC_COUNTER(VM, mmu_pte_write), @@ -269,7 +269,7 @@ const struct kvm_stats_header kvm_vm_stats_header = { sizeof(kvm_vm_stats_desc), }; -const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, pf_taken), STATS_DESC_COUNTER(VCPU, pf_fixed), @@ -4805,7 +4805,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: - case KVM_CAP_SYNC_MMU: case KVM_CAP_USER_NMI: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_IOEVENTFD:
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 5b9908f..3a53648 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile
@@ -4,6 +4,8 @@ KCOV_INSTRUMENT_mem_encrypt.o := n KCOV_INSTRUMENT_mem_encrypt_amd.o := n KCOV_INSTRUMENT_pgprot.o := n +# See the "Disable KCOV" comment in arch/x86/kernel/Makefile. +KCOV_INSTRUMENT_physaddr.o := n KASAN_SANITIZE_mem_encrypt.o := n KASAN_SANITIZE_mem_encrypt_amd.o := n
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 2fdc1f1f..6b9ff1c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c
@@ -411,14 +411,11 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) return; if (trapnr == X86_TRAP_UD) { - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; + if (handle_bug(regs)) return; - } /* - * If this was a BUG and report_bug returns or if this + * If this was a BUG and handle_bug returns or if this * was just a normal #UD, we want to continue onward and * crash. */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 7a97327..99d0a93 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c
@@ -48,6 +48,8 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; +nodemask_t numa_phys_nodes_parsed __initdata; + int numa_cpu_node(int cpu) { u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); @@ -57,6 +59,11 @@ int numa_cpu_node(int cpu) return NUMA_NO_NODE; } +int __init num_phys_nodes(void) +{ + return bitmap_weight(numa_phys_nodes_parsed.bits, MAX_NUMNODES); +} + cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); @@ -210,6 +217,7 @@ static int __init dummy_numa_init(void) 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); + node_set(0, numa_phys_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); return 0;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 6f8e0f2..44ca666 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c
@@ -57,6 +57,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); + node_set(node, numa_phys_nodes_parsed); pr_debug("SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -97,6 +98,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); + node_set(node, numa_phys_nodes_parsed); pr_debug("SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8f10080..e9b7804 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c
@@ -438,17 +438,8 @@ static void emit_kcfi(u8 **pprog, u32 hash) EMIT1_off32(0xb8, hash); /* movl $hash, %eax */ #ifdef CONFIG_CALL_PADDING - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); - EMIT1(0x90); + for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++) + EMIT1(0x90); #endif EMIT_ENDBR();
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index d00c6de..d84c602 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c
@@ -836,7 +836,7 @@ static void __init __efi_enter_virtual_mode(void) } efi_check_for_embedded_firmwares(); - efi_free_boot_services(); + efi_unmap_boot_services(); if (!efi_is_mixed()) efi_native_runtime_setup();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 553f330..79f0818 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c
@@ -341,7 +341,7 @@ void __init efi_reserve_boot_services(void) /* * Because the following memblock_reserve() is paired - * with memblock_free_late() for this region in + * with free_reserved_area() for this region in * efi_free_boot_services(), we must be extremely * careful not to reserve, and subsequently free, * critical regions of memory (like the kernel image) or @@ -404,17 +404,33 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) pr_err("Failed to unmap VA mapping for 0x%llx\n", va); } -void __init efi_free_boot_services(void) +struct efi_freeable_range { + u64 start; + u64 end; +}; + +static struct efi_freeable_range *ranges_to_free; + +void __init efi_unmap_boot_services(void) { struct efi_memory_map_data data = { 0 }; efi_memory_desc_t *md; int num_entries = 0; + int idx = 0; + size_t sz; void *new, *new_md; /* Keep all regions for /sys/kernel/debug/efi */ if (efi_enabled(EFI_DBG)) return; + sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1); + ranges_to_free = kzalloc(sz, GFP_KERNEL); + if (!ranges_to_free) { + pr_err("Failed to allocate storage for freeable EFI regions\n"); + return; + } + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; @@ -471,7 +487,15 @@ void __init efi_free_boot_services(void) start = SZ_1M; } - memblock_free_late(start, size); + /* + * With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory + * map are still not initialized and we can't reliably free + * memory here. + * Queue the ranges to free at a later point. + */ + ranges_to_free[idx].start = start; + ranges_to_free[idx].end = start + size; + idx++; } if (!num_entries) @@ -512,6 +536,31 @@ void __init efi_free_boot_services(void) } } +static int __init efi_free_boot_services(void) +{ + struct efi_freeable_range *range = ranges_to_free; + unsigned long freed = 0; + + if (!ranges_to_free) + return 0; + + while (range->start) { + void *start = phys_to_virt(range->start); + void *end = phys_to_virt(range->end); + + free_reserved_area(start, end, -1, NULL); + freed += (end - start); + range++; + } + kfree(ranges_to_free); + + if (freed) + pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K); + + return 0; +} +arch_initcall(efi_free_boot_services); + /* * A number of config table entries get remapped to virtual addresses * after entering EFI virtual mode. However, the kexec kernel requires
diff --git a/arch/x86/platform/geode/geode-common.c b/arch/x86/platform/geode/geode-common.c index 05189c5..1843ae3 100644 --- a/arch/x86/platform/geode/geode-common.c +++ b/arch/x86/platform/geode/geode-common.c
@@ -28,8 +28,10 @@ static const struct software_node geode_gpio_keys_node = { .properties = geode_gpio_keys_props, }; -static struct property_entry geode_restart_key_props[] = { - { /* Placeholder for GPIO property */ }, +static struct software_node_ref_args geode_restart_gpio_ref; + +static const struct property_entry geode_restart_key_props[] = { + PROPERTY_ENTRY_REF_ARRAY_LEN("gpios", &geode_restart_gpio_ref, 1), PROPERTY_ENTRY_U32("linux,code", KEY_RESTART), PROPERTY_ENTRY_STRING("label", "Reset button"), PROPERTY_ENTRY_U32("debounce-interval", 100), @@ -64,8 +66,7 @@ int __init geode_create_restart_key(unsigned int pin) struct platform_device *pd; int err; - geode_restart_key_props[0] = PROPERTY_ENTRY_GPIO("gpios", - &geode_gpiochip_node, + geode_restart_gpio_ref = SOFTWARE_NODE_REFERENCE(&geode_gpiochip_node, pin, GPIO_ACTIVE_LOW); err = software_node_register_node_group(geode_gpio_keys_swnodes); @@ -99,6 +100,7 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds, const struct software_node *group[MAX_LEDS + 2] = { 0 }; struct software_node *swnodes; struct property_entry *props; + struct software_node_ref_args *gpio_refs; struct platform_device_info led_info = { .name = "leds-gpio", .id = PLATFORM_DEVID_NONE, @@ -127,6 +129,12 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds, goto err_free_swnodes; } + gpio_refs = kzalloc_objs(*gpio_refs, n_leds); + if (!gpio_refs) { + err = -ENOMEM; + goto err_free_props; + } + group[0] = &geode_gpio_leds_node; for (i = 0; i < n_leds; i++) { node_name = kasprintf(GFP_KERNEL, "%s:%d", label, i); @@ -135,9 +143,11 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds, goto err_free_names; } + gpio_refs[i] = SOFTWARE_NODE_REFERENCE(&geode_gpiochip_node, + leds[i].pin, + GPIO_ACTIVE_LOW); props[i * 3 + 0] = - PROPERTY_ENTRY_GPIO("gpios", &geode_gpiochip_node, - leds[i].pin, GPIO_ACTIVE_LOW); + PROPERTY_ENTRY_REF_ARRAY_LEN("gpios", &gpio_refs[i], 1); props[i * 3 + 1] = PROPERTY_ENTRY_STRING("linux,default-trigger", leds[i].default_on ? @@ -171,6 +181,8 @@ int __init geode_create_leds(const char *label, const struct geode_led *leds, err_free_names: while (--i >= 0) kfree(swnodes[i].name); + kfree(gpio_refs); +err_free_props: kfree(props); err_free_swnodes: kfree(swnodes);
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 2263885..f2053cb 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c
@@ -25,11 +25,6 @@ struct hvm_start_info __initdata pvh_start_info; const unsigned int __initconst pvh_start_info_sz = sizeof(pvh_start_info); -static u64 __init pvh_get_root_pointer(void) -{ - return pvh_start_info.rsdp_paddr; -} - /* * Xen guests are able to obtain the memory map from the hypervisor via the * HYPERVISOR_memory_op hypercall. @@ -95,7 +90,7 @@ static void __init init_pvh_bootparams(bool xen_guest) pvh_bootparams.hdr.version = (2 << 8) | 12; pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0; - x86_init.acpi.get_root_pointer = pvh_get_root_pointer; + pvh_bootparams.acpi_rsdp_addr = pvh_start_info.rsdp_paddr; } /*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 6e459e4..eaad22b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c
@@ -392,7 +392,7 @@ static void __init xen_init_capabilities(void) /* * Xen PV would need some work to support PCID: CR3 handling as well - * as xen_flush_tlb_others() would need updating. + * as xen_flush_tlb_multi() would need updating. */ setup_clear_cpu_cap(X86_FEATURE_PCID);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 3254eaa..c80d005 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c
@@ -105,6 +105,9 @@ pte_t xen_make_pte_init(pteval_t pte); static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; #endif +static pud_t level3_ident_pgt[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t level2_ident_pgt[PTRS_PER_PMD] __page_aligned_bss; + /* * Protects atomic reservation decrease/increase against concurrent increases. * Also protects non-atomic updates of current_pages and balloon lists. @@ -1777,6 +1780,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Zap identity mapping */ init_top_pgt[0] = __pgd(0); + init_top_pgt[pgd_index(__PAGE_OFFSET_BASE_L4)].pgd = + __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE_NOENC; + init_top_pgt[pgd_index(__START_KERNEL_map)].pgd = + __pa_symbol(level3_kernel_pgt) + _PAGE_TABLE_NOENC; + level3_ident_pgt[0].pud = __pa_symbol(level2_ident_pgt) + _KERNPG_TABLE_NOENC; + /* Pre-constructed entries are in pfn, so convert to mfn */ /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */
diff --git a/block/blk-map.c b/block/blk-map.c index 53bdd10..768549f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c
@@ -398,8 +398,7 @@ static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int le if (op_is_write(op)) memcpy(page_address(page), p, bytes); - if (bio_add_page(bio, page, bytes, 0) < bytes) - break; + __bio_add_page(bio, page, bytes, 0); len -= bytes; p += bytes;
diff --git a/block/blk-mq.c b/block/blk-mq.c index 9af8c3d..3da2215 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c
@@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } -static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, - int new_nr_hw_queues) +static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags( + struct blk_mq_tag_set *set, + int new_nr_hw_queues) { struct blk_mq_tags **new_tags; int i; if (set->nr_hw_queues >= new_nr_hw_queues) - goto done; + return NULL; new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!new_tags) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (set->tags) memcpy(new_tags, set->tags, set->nr_hw_queues * sizeof(*set->tags)); - kfree(set->tags); - set->tags = new_tags; for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { - if (!__blk_mq_alloc_map_and_rqs(set, i)) { - while (--i >= set->nr_hw_queues) - __blk_mq_free_map_and_rqs(set, i); - return -ENOMEM; + if (blk_mq_is_shared_tags(set->flags)) { + new_tags[i] = set->shared_tags; + } else { + new_tags[i] = blk_mq_alloc_map_and_rqs(set, i, + set->queue_depth); + if (!new_tags[i]) + goto out_unwind; } cond_resched(); } -done: - set->nr_hw_queues = new_nr_hw_queues; - return 0; + return new_tags; +out_unwind: + while (--i >= set->nr_hw_queues) { + if (!blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, new_tags[i], i); + } + kfree(new_tags); + return ERR_PTR(-ENOMEM); } /* @@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl; + struct blk_mq_tags **new_tags; bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues); + if (IS_ERR(new_tags)) + goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue_nomemsave(q); queues_frozen = true; - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) - goto switch_back; + if (new_tags) { + kfree(set->tags); + set->tags = new_tags; + } + set->nr_hw_queues = nr_hw_queues; fallback: blk_mq_update_queue_map(set);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f3b1968c..55a1bbf 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c
@@ -78,8 +78,14 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count) /* * Serialize updating nr_requests with concurrent queue_requests_store() * and switching elevator. + * + * Use trylock to avoid circular lock dependency with kernfs active + * reference during concurrent disk deletion: + * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) + * kn->active -> update_nr_hwq_lock (via this sysfs write path) */ - down_write(&set->update_nr_hwq_lock); + if (!down_write_trylock(&set->update_nr_hwq_lock)) + return -EBUSY; if (nr == q->nr_requests) goto unlock;
diff --git a/block/elevator.c b/block/elevator.c index ebe2a1f..3bcd37c 100644 --- a/block/elevator.c +++ b/block/elevator.c
@@ -807,7 +807,16 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, elv_iosched_load_module(ctx.name); ctx.type = elevator_find_get(ctx.name); - down_read(&set->update_nr_hwq_lock); + /* + * Use trylock to avoid circular lock dependency with kernfs active + * reference during concurrent disk deletion: + * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) + * kn->active -> update_nr_hwq_lock (via this sysfs write path) + */ + if (!down_read_trylock(&set->update_nr_hwq_lock)) { + ret = -EBUSY; + goto out; + } if (!blk_queue_no_elv_switch(q)) { ret = elevator_change(q, &ctx); if (!ret) @@ -817,6 +826,7 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, } up_read(&set->update_nr_hwq_lock); +out: if (ctx.type) elevator_put(ctx.type); return ret;
diff --git a/crypto/Kconfig b/crypto/Kconfig index e2b4106..b4bb85e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig
@@ -876,8 +876,6 @@ - blake2b-384 - blake2b-512 - Used by the btrfs filesystem. - See https://blake2.net for further information. config CRYPTO_CMAC @@ -965,7 +963,6 @@ 10118-3), including HMAC support. This is required for IPsec AH (XFRM_AH) and IPsec ESP (XFRM_ESP). - Used by the btrfs filesystem, Ceph, NFS, and SMB. config CRYPTO_SHA512 tristate "SHA-384 and SHA-512" @@ -1039,8 +1036,6 @@ Extremely fast, working at speeds close to RAM limits. - Used by the btrfs filesystem. - endmenu menu "CRCs (cyclic redundancy checks)" @@ -1058,8 +1053,6 @@ on Communications, Vol. 41, No. 6, June 1993, selected for use with iSCSI. - Used by btrfs, ext4, jbd2, NVMeoF/TCP, and iSCSI. - config CRYPTO_CRC32 tristate "CRC32" select CRYPTO_HASH @@ -1067,8 +1060,6 @@ help CRC32 CRC algorithm (IEEE 802.3) - Used by RoCEv2 and f2fs. - endmenu menu "Compression"
diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 0bb609f..8e01993 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c
@@ -623,8 +623,10 @@ static int af_alg_alloc_tsgl(struct sock *sk) sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); sgl->cur = 0; - if (sg) + if (sg) { + sg_unmark_end(sg + MAX_SGL_ENTS - 1); sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + } list_add_tail(&sgl->list, &ctx->tsgl_list); } @@ -635,15 +637,13 @@ static int af_alg_alloc_tsgl(struct sock *sk) /** * af_alg_count_tsgl - Count number of TX SG entries * - * The counting starts from the beginning of the SGL to @bytes. If - * an @offset is provided, the counting of the SG entries starts at the @offset. + * The counting starts from the beginning of the SGL to @bytes. * * @sk: socket of connection to user space * @bytes: Count the number of SG entries holding given number of bytes. - * @offset: Start the counting of SG entries from the given offset. * Return: Number of TX SG entries found given the constraints */ -unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset) +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes) { const struct alg_sock *ask = alg_sk(sk); const struct af_alg_ctx *ctx = ask->private; @@ -658,25 +658,11 @@ unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset) const struct scatterlist *sg = sgl->sg; for (i = 0; i < sgl->cur; i++) { - size_t bytes_count; - - /* Skip offset */ - if (offset >= sg[i].length) { - offset -= sg[i].length; - bytes -= sg[i].length; - continue; - } - - bytes_count = sg[i].length - offset; - - offset = 0; sgl_count++; - - /* If we have seen requested number of bytes, stop */ - if (bytes_count >= bytes) + if (sg[i].length >= bytes) return sgl_count; - bytes -= bytes_count; + bytes -= sg[i].length; } } @@ -688,19 +674,14 @@ EXPORT_SYMBOL_GPL(af_alg_count_tsgl); * af_alg_pull_tsgl - Release the specified buffers from TX SGL * * If @dst is non-null, reassign the pages to @dst. The caller must release - * the pages. If @dst_offset is given only reassign the pages to @dst starting - * at the @dst_offset (byte). The caller must ensure that @dst is large - * enough (e.g. by using af_alg_count_tsgl with the same offset). + * the pages. * * @sk: socket of connection to user space * @used: Number of bytes to pull from TX SGL * @dst: If non-NULL, buffer is reassigned to dst SGL instead of releasing. The * caller must release the buffers in dst. - * @dst_offset: Reassign the TX SGL from given offset. All buffers before - * reaching the offset is released. */ -void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, - size_t dst_offset) +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst) { struct alg_sock *ask = alg_sk(sk); struct af_alg_ctx *ctx = ask->private; @@ -725,18 +706,10 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, * SG entries in dst. */ if (dst) { - if (dst_offset >= plen) { - /* discard page before offset */ - dst_offset -= plen; - } else { - /* reassign page to dst after offset */ - get_page(page); - sg_set_page(dst + j, page, - plen - dst_offset, - sg[i].offset + dst_offset); - dst_offset = 0; - j++; - } + /* reassign page to dst after offset */ + get_page(page); + sg_set_page(dst + j, page, plen, sg[i].offset); + j++; } sg[i].length -= plen;
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 79b016a..dda15bb 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c
@@ -26,7 +26,6 @@ #include <crypto/internal/aead.h> #include <crypto/scatterwalk.h> #include <crypto/if_alg.h> -#include <crypto/skcipher.h> #include <linux/init.h> #include <linux/list.h> #include <linux/kernel.h> @@ -72,9 +71,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, struct alg_sock *pask = alg_sk(psk); struct af_alg_ctx *ctx = ask->private; struct crypto_aead *tfm = pask->private; - unsigned int i, as = crypto_aead_authsize(tfm); + unsigned int as = crypto_aead_authsize(tfm); struct af_alg_async_req *areq; - struct af_alg_tsgl *tsgl, *tmp; struct scatterlist *rsgl_src, *tsgl_src = NULL; int err = 0; size_t used = 0; /* [in] TX bufs to be en/decrypted */ @@ -154,23 +152,24 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, outlen -= less; } + /* + * Create a per request TX SGL for this request which tracks the + * SG entries from the global TX SGL. + */ processed = used + ctx->aead_assoclen; - list_for_each_entry_safe(tsgl, tmp, &ctx->tsgl_list, list) { - for (i = 0; i < tsgl->cur; i++) { - struct scatterlist *process_sg = tsgl->sg + i; - - if (!(process_sg->length) || !sg_page(process_sg)) - continue; - tsgl_src = process_sg; - break; - } - if (tsgl_src) - break; - } - if (processed && !tsgl_src) { - err = -EFAULT; + areq->tsgl_entries = af_alg_count_tsgl(sk, processed); + if (!areq->tsgl_entries) + areq->tsgl_entries = 1; + areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), + areq->tsgl_entries), + GFP_KERNEL); + if (!areq->tsgl) { + err = -ENOMEM; goto free; } + sg_init_table(areq->tsgl, areq->tsgl_entries); + af_alg_pull_tsgl(sk, processed, areq->tsgl); + tsgl_src = areq->tsgl; /* * Copy of AAD from source to destination @@ -179,76 +178,15 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, * when user space uses an in-place cipher operation, the kernel * will copy the data as it does not see whether such in-place operation * is initiated. - * - * To ensure efficiency, the following implementation ensure that the - * ciphers are invoked to perform a crypto operation in-place. This - * is achieved by memory management specified as follows. */ /* Use the RX SGL as source (and destination) for crypto op. */ rsgl_src = areq->first_rsgl.sgl.sgt.sgl; - if (ctx->enc) { - /* - * Encryption operation - The in-place cipher operation is - * achieved by the following operation: - * - * TX SGL: AAD || PT - * | | - * | copy | - * v v - * RX SGL: AAD || PT || Tag - */ - memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, - processed); - af_alg_pull_tsgl(sk, processed, NULL, 0); - } else { - /* - * Decryption operation - To achieve an in-place cipher - * operation, the following SGL structure is used: - * - * TX SGL: AAD || CT || Tag - * | | ^ - * | copy | | Create SGL link. - * v v | - * RX SGL: AAD || CT ----+ - */ - - /* Copy AAD || CT to RX SGL buffer for in-place operation. */ - memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, outlen); - - /* Create TX SGL for tag and chain it to RX SGL. */ - areq->tsgl_entries = af_alg_count_tsgl(sk, processed, - processed - as); - if (!areq->tsgl_entries) - areq->tsgl_entries = 1; - areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), - areq->tsgl_entries), - GFP_KERNEL); - if (!areq->tsgl) { - err = -ENOMEM; - goto free; - } - sg_init_table(areq->tsgl, areq->tsgl_entries); - - /* Release TX SGL, except for tag data and reassign tag data. */ - af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as); - - /* chain the areq TX SGL holding the tag with RX SGL */ - if (usedpages) { - /* RX SGL present */ - struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl; - struct scatterlist *sg = sgl_prev->sgt.sgl; - - sg_unmark_end(sg + sgl_prev->sgt.nents - 1); - sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl); - } else - /* no RX SGL present (e.g. authentication only) */ - rsgl_src = areq->tsgl; - } + memcpy_sglist(rsgl_src, tsgl_src, ctx->aead_assoclen); /* Initialize the crypto operation */ - aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src, + aead_request_set_crypt(&areq->cra_u.aead_req, tsgl_src, areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv); aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); aead_request_set_tfm(&areq->cra_u.aead_req, tfm); @@ -450,7 +388,7 @@ static void aead_sock_destruct(struct sock *sk) struct crypto_aead *tfm = pask->private; unsigned int ivlen = crypto_aead_ivsize(tfm); - af_alg_pull_tsgl(sk, ctx->used, NULL, 0); + af_alg_pull_tsgl(sk, ctx->used, NULL); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 125d395..82735e5 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c
@@ -138,7 +138,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, * Create a per request TX SGL for this request which tracks the * SG entries from the global TX SGL. */ - areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0); + areq->tsgl_entries = af_alg_count_tsgl(sk, len); if (!areq->tsgl_entries) areq->tsgl_entries = 1; areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), @@ -149,7 +149,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, goto free; } sg_init_table(areq->tsgl, areq->tsgl_entries); - af_alg_pull_tsgl(sk, len, areq->tsgl, 0); + af_alg_pull_tsgl(sk, len, areq->tsgl); /* Initialize the crypto operation */ skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm); @@ -363,7 +363,7 @@ static void skcipher_sock_destruct(struct sock *sk) struct alg_sock *pask = alg_sk(psk); struct crypto_skcipher *tfm = pask->private; - af_alg_pull_tsgl(sk, ctx->used, NULL, 0); + af_alg_pull_tsgl(sk, ctx->used, NULL); sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm)); if (ctx->state) sock_kzfree_s(sk, ctx->state, crypto_skcipher_statesize(tfm));
diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 542a978..c0a01d7 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c
@@ -207,6 +207,7 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, u8 *ohash = areq_ctx->tail; unsigned int cryptlen = req->cryptlen - authsize; unsigned int assoclen = req->assoclen; + struct scatterlist *src = req->src; struct scatterlist *dst = req->dst; u8 *ihash = ohash + crypto_ahash_digestsize(auth); u32 tmp[2]; @@ -214,23 +215,27 @@ static int crypto_authenc_esn_decrypt_tail(struct aead_request *req, if (!authsize) goto decrypt; - /* Move high-order bits of sequence number back. */ - scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); - scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); - scatterwalk_map_and_copy(tmp, dst, 0, 8, 1); + if (src == dst) { + /* Move high-order bits of sequence number back. */ + scatterwalk_map_and_copy(tmp, dst, 4, 4, 0); + scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 0); + scatterwalk_map_and_copy(tmp, dst, 0, 8, 1); + } else + memcpy_sglist(dst, src, assoclen); if (crypto_memneq(ihash, ohash, authsize)) return -EBADMSG; decrypt: - sg_init_table(areq_ctx->dst, 2); + if (src != dst) + src = scatterwalk_ffwd(areq_ctx->src, src, assoclen); dst = scatterwalk_ffwd(areq_ctx->dst, dst, assoclen); skcipher_request_set_tfm(skreq, ctx->enc); skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); - skcipher_request_set_crypt(skreq, dst, dst, cryptlen, req->iv); + skcipher_request_set_crypt(skreq, src, dst, cryptlen, req->iv); return crypto_skcipher_decrypt(skreq); } @@ -255,6 +260,7 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req) unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; u8 *ihash = ohash + crypto_ahash_digestsize(auth); + struct scatterlist *src = req->src; struct scatterlist *dst = req->dst; u32 tmp[2]; int err; @@ -262,24 +268,28 @@ static int crypto_authenc_esn_decrypt(struct aead_request *req) if (assoclen < 8) return -EINVAL; - cryptlen -= authsize; - - if (req->src != dst) - memcpy_sglist(dst, req->src, assoclen + cryptlen); - - scatterwalk_map_and_copy(ihash, req->src, assoclen + cryptlen, - authsize, 0); - if (!authsize) goto tail; - /* Move high-order bits of sequence number to the end. */ - scatterwalk_map_and_copy(tmp, dst, 0, 8, 0); - scatterwalk_map_and_copy(tmp, dst, 4, 4, 1); - scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1); + cryptlen -= authsize; + scatterwalk_map_and_copy(ihash, req->src, assoclen + cryptlen, + authsize, 0); - sg_init_table(areq_ctx->dst, 2); - dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4); + /* Move high-order bits of sequence number to the end. */ + scatterwalk_map_and_copy(tmp, src, 0, 8, 0); + if (src == dst) { + scatterwalk_map_and_copy(tmp, dst, 4, 4, 1); + scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen, 4, 1); + dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4); + } else { + scatterwalk_map_and_copy(tmp, dst, 0, 4, 1); + scatterwalk_map_and_copy(tmp + 1, dst, assoclen + cryptlen - 4, 4, 1); + + src = scatterwalk_ffwd(areq_ctx->src, src, 8); + dst = scatterwalk_ffwd(areq_ctx->dst, dst, 4); + memcpy_sglist(dst, src, assoclen + cryptlen - 8); + dst = req->dst; + } ahash_request_set_tfm(ahreq, auth); ahash_request_set_crypt(ahreq, dst, ohash, assoclen + cryptlen);
diff --git a/crypto/deflate.c b/crypto/deflate.c index 46fc7de..710ebba 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c
@@ -164,18 +164,21 @@ static int deflate_decompress_one(struct acomp_req *req, do { unsigned int dcur; + unsigned long avail_in; dcur = acomp_walk_next_dst(&walk); - if (!dcur) { - out_of_space = true; - break; - } stream->avail_out = dcur; stream->next_out = walk.dst.virt.addr; + avail_in = stream->avail_in; ret = zlib_inflate(stream, Z_NO_FLUSH); + if (!dcur && avail_in == stream->avail_in) { + out_of_space = true; + break; + } + dcur -= stream->avail_out; acomp_walk_done_dst(&walk, dcur); } while (ret == Z_OK && stream->avail_in);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 49b607f..4985411 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c
@@ -4132,7 +4132,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, }, { .alg = "authenc(hmac(sha224),cbc(aes))", - .generic_driver = "authenc(hmac-sha224-lib,cbc(aes-generic))", + .generic_driver = "authenc(hmac-sha224-lib,cbc(aes-lib))", .test = alg_test_aead, .suite = { .aead = __VECS(hmac_sha224_aes_cbc_tv_temp) @@ -4194,7 +4194,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, }, { .alg = "authenc(hmac(sha384),cbc(aes))", - .generic_driver = "authenc(hmac-sha384-lib,cbc(aes-generic))", + .generic_driver = "authenc(hmac-sha384-lib,cbc(aes-lib))", .test = alg_test_aead, .suite = { .aead = __VECS(hmac_sha384_aes_cbc_tv_temp)
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 4503c7c..c0d3488 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -23,9 +23,9 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_pm.h" -static bool force_cmdlist; +static bool force_cmdlist = true; module_param(force_cmdlist, bool, 0600); -MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ @@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct { drm_sched_stop(&hwctx->priv->sched, bad_job); aie2_destroy_context(xdna->dev_handle, hwctx); + drm_sched_start(&hwctx->priv->sched, 0); } static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) @@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw } out: - drm_sched_start(&hwctx->priv->sched, 0); XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); return ret; } @@ -165,7 +165,6 @@ aie2_sched_notify(struct amdxdna_sched_job *job) trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); - amdxdna_pm_suspend_put(job->hwctx->client->xdna); job->hwctx->priv->completed++; dma_fence_signal(fence); @@ -186,13 +185,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) cmd_abo = job->cmd_bo; if (unlikely(job->job_timeout)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); ret = -EINVAL; goto out; } if (unlikely(!data) || unlikely(size != sizeof(u32))) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); ret = -EINVAL; goto out; } @@ -202,7 +201,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size) if (status == AIE2_STATUS_SUCCESS) amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); else - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR); out: aie2_sched_notify(job); @@ -244,13 +243,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) cmd_abo = job->cmd_bo; if (unlikely(job->job_timeout)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT); ret = -EINVAL; goto out; } if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT); ret = -EINVAL; goto out; } @@ -270,19 +269,12 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) fail_cmd_idx, fail_cmd_status); if (fail_cmd_status == AIE2_STATUS_SUCCESS) { - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); + amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT); ret = -EINVAL; - goto out; + } else { + amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR); } - amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); - if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { - struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); - - cc->error_index = fail_cmd_idx; - if (cc->error_index >= cc->command_count) - cc->error_index = 0; - } out: aie2_sched_notify(job); return ret; @@ -306,10 +298,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) kref_get(&job->refcnt); fence = dma_fence_get(job->fence); - ret = amdxdna_pm_resume_get(hwctx->client->xdna); - if (ret) - goto out; - if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: @@ -336,7 +324,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) out: if (ret) { - amdxdna_pm_suspend_put(hwctx->client->xdna); dma_fence_put(job->fence); aie2_job_put(job); mmput(job->mm); @@ -497,7 +484,7 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { ret = aie2_destroy_context(xdna->dev_handle, hwctx); - if (ret) + if (ret && ret != -ENODEV) XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); } else { ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); @@ -629,7 +616,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto free_entity; } - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_col_list; @@ -760,7 +747,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (!hwctx->cus) return -ENOMEM; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_cus; @@ -1070,6 +1057,8 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); - if (!ret || ret == -ERESTARTSYS) + if (!ret) XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); + else if (ret == -ERESTARTSYS) + XDNA_DBG(xdna, "Wait for bo interrupted by signal"); }
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 7d7dcfe..ffcf3be 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, return -ENODEV; ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); - if (ret == -ETIME) { - xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); - ndev->mgmt_chann = NULL; - } + if (ret == -ETIME) + aie2_destroy_mgmt_chann(ndev); if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) { XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x", @@ -216,8 +213,10 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) req.context_id = id; ret = aie2_send_mgmt_msg_wait(ndev, &msg); - if (ret) + if (ret && ret != -ENODEV) XDNA_WARN(xdna, "Destroy context failed, ret %d", ret); + else if (ret == -ENODEV) + XDNA_DBG(xdna, "Destroy context: device already stopped"); return ret; } @@ -294,13 +293,20 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct } intr_reg = i2x.mb_head_ptr_reg + 4; - hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, - intr_reg, ret); + hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox); if (!hwctx->priv->mbox_chann) { XDNA_ERR(xdna, "Not able to create channel"); ret = -EINVAL; goto del_ctx_req; } + + ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x, + intr_reg, ret); + if (ret) { + XDNA_ERR(xdna, "Not able to create channel"); + ret = -EINVAL; + goto free_channel; + } ndev->hwctx_num++; XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id); @@ -308,6 +314,8 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct return 0; +free_channel: + xdna_mailbox_free_channel(hwctx->priv->mbox_chann); del_ctx_req: aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); return ret; @@ -318,9 +326,12 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc struct amdxdna_dev *xdna = ndev->xdna; int ret; + if (!hwctx->priv->mbox_chann) + return 0; + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); - xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + xdna_mailbox_free_channel(hwctx->priv->mbox_chann); XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id); hwctx->priv->mbox_chann = NULL; hwctx->fw_ctx_id = -1; @@ -694,11 +705,11 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz u32 cmd_len; void *cmd; - memset(npu_slot, 0, sizeof(*npu_slot)); cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -719,7 +730,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*sn); if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -728,6 +738,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -751,7 +762,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -760,6 +770,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -787,7 +798,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -796,6 +806,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_ELF; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; @@ -909,6 +920,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev) ndev->exec_msg_ops = &legacy_exec_message_ops; } +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (!ndev->mgmt_chann) + return; + + xdna_mailbox_stop_channel(ndev->mgmt_chann); + xdna_mailbox_free_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; +} + static inline struct amdxdna_gem_obj * aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) {
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 2a51b26..4924a9d 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -32,6 +32,11 @@ static int aie2_max_col = XRS_MAX_COL; module_param(aie2_max_col, uint, 0600); MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); +static char *npu_fw[] = { + "npu_7.sbin", + "npu.sbin" +}; + /* * The management mailbox channel is allocated by firmware. * The related register and ring buffer information is on SRAM BAR. @@ -323,10 +328,9 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) return; } + aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL); aie2_mgmt_fw_fini(ndev); - xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); - ndev->mgmt_chann = NULL; + aie2_destroy_mgmt_chann(ndev); drmm_kfree(&xdna->ddev, ndev->mbox); ndev->mbox = NULL; aie2_psp_stop(ndev->psp_hdl); @@ -357,10 +361,29 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) } pci_set_master(pdev); + mbox_res.ringbuf_base = ndev->sram_base; + mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); + mbox_res.mbox_base = ndev->mbox_base; + mbox_res.mbox_size = MBOX_SIZE(ndev); + mbox_res.name = "xdna_mailbox"; + ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); + if (!ndev->mbox) { + XDNA_ERR(xdna, "failed to create mailbox device"); + ret = -ENODEV; + goto disable_dev; + } + + ndev->mgmt_chann = xdna_mailbox_alloc_channel(ndev->mbox); + if (!ndev->mgmt_chann) { + XDNA_ERR(xdna, "failed to alloc channel"); + ret = -ENODEV; + goto disable_dev; + } + ret = aie2_smu_init(ndev); if (ret) { XDNA_ERR(xdna, "failed to init smu, ret %d", ret); - goto disable_dev; + goto free_channel; } ret = aie2_psp_start(ndev->psp_hdl); @@ -375,18 +398,6 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } - mbox_res.ringbuf_base = ndev->sram_base; - mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar); - mbox_res.mbox_base = ndev->mbox_base; - mbox_res.mbox_size = MBOX_SIZE(ndev); - mbox_res.name = "xdna_mailbox"; - ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res); - if (!ndev->mbox) { - XDNA_ERR(xdna, "failed to create mailbox device"); - ret = -ENODEV; - goto stop_psp; - } - mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx); if (mgmt_mb_irq < 0) { ret = mgmt_mb_irq; @@ -395,52 +406,55 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) } xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4; - ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox, - &ndev->mgmt_x2i, - &ndev->mgmt_i2x, - xdna_mailbox_intr_reg, - mgmt_mb_irq); - if (!ndev->mgmt_chann) { - XDNA_ERR(xdna, "failed to create management mailbox channel"); + ret = xdna_mailbox_start_channel(ndev->mgmt_chann, + &ndev->mgmt_x2i, + &ndev->mgmt_i2x, + xdna_mailbox_intr_reg, + mgmt_mb_irq); + if (ret) { + XDNA_ERR(xdna, "failed to start management mailbox channel"); ret = -EINVAL; goto stop_psp; } - ret = aie2_pm_init(ndev); - if (ret) { - XDNA_ERR(xdna, "failed to init pm, ret %d", ret); - goto destroy_mgmt_chann; - } - ret = aie2_mgmt_fw_init(ndev); if (ret) { XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; + } + + ret = aie2_pm_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + goto stop_fw; } ret = aie2_mgmt_fw_query(ndev); if (ret) { XDNA_ERR(xdna, "failed to query fw, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ret = aie2_error_async_events_alloc(ndev); if (ret) { XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret); - goto destroy_mgmt_chann; + goto stop_fw; } ndev->dev_status = AIE2_DEV_START; return 0; -destroy_mgmt_chann: +stop_fw: + aie2_suspend_fw(ndev); xdna_mailbox_stop_channel(ndev->mgmt_chann); - xdna_mailbox_destroy_channel(ndev->mgmt_chann); stop_psp: aie2_psp_stop(ndev->psp_hdl); fini_smu: aie2_smu_fini(ndev); +free_channel: + xdna_mailbox_free_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; disable_dev: pci_disable_device(pdev); @@ -451,7 +465,6 @@ static int aie2_hw_suspend(struct amdxdna_dev *xdna) { struct amdxdna_client *client; - guard(mutex)(&xdna->dev_lock); list_for_each_entry(client, &xdna->client_list, node) aie2_hwctx_suspend(client); @@ -489,6 +502,7 @@ static int aie2_init(struct amdxdna_dev *xdna) struct psp_config psp_conf; const struct firmware *fw; unsigned long bars = 0; + char *fw_full_path; int i, nvec, ret; if (!hypervisor_is_type(X86_HYPER_NATIVE)) { @@ -503,7 +517,19 @@ static int aie2_init(struct amdxdna_dev *xdna) ndev->priv = xdna->dev_info->dev_priv; ndev->xdna = xdna; - ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); + for (i = 0; i < ARRAY_SIZE(npu_fw); i++) { + fw_full_path = kasprintf(GFP_KERNEL, "%s%s", ndev->priv->fw_path, npu_fw[i]); + if (!fw_full_path) + return -ENOMEM; + + ret = firmware_request_nowarn(&fw, fw_full_path, &pdev->dev); + kfree(fw_full_path); + if (!ret) { + XDNA_INFO(xdna, "Load firmware %s%s", ndev->priv->fw_path, npu_fw[i]); + break; + } + } + if (ret) { XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", ndev->priv->fw_path, ret); @@ -951,7 +977,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1044,7 +1070,7 @@ static int aie2_get_array(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1134,7 +1160,7 @@ static int aie2_set_state(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index b20a366..e72311c 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, /* aie2_message.c */ void aie2_msg_init(struct amdxdna_dev_hdl *ndev); +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev); int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index 579b8be..29bd440 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -31,7 +31,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); + ret = amdxdna_pm_resume_get_locked(ndev->xdna); if (ret) return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 59fa380..8384309 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -17,6 +17,7 @@ #include "amdxdna_ctx.h" #include "amdxdna_gem.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #define MAX_HWCTX_ID 255 #define MAX_ARG_COUNT 4095 @@ -104,7 +105,10 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) if (size) { count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); - if (unlikely(count <= num_masks)) { + if (unlikely(count <= num_masks || + count * sizeof(u32) + + offsetof(struct amdxdna_cmd, data[0]) > + abo->mem.size)) { *size = 0; return NULL; } @@ -132,6 +136,33 @@ u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) return INVALID_CU_IDX; } +int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo, + struct amdxdna_sched_job *job, u32 cmd_idx, + enum ert_cmd_state error_state) +{ + struct amdxdna_client *client = job->hwctx->client; + struct amdxdna_cmd *cmd = abo->mem.kva; + struct amdxdna_cmd_chain *cc = NULL; + + cmd->header &= ~AMDXDNA_CMD_STATE; + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, error_state); + + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) { + cc = amdxdna_cmd_get_payload(abo, NULL); + cc->error_index = (cmd_idx < cc->command_count) ? cmd_idx : 0; + abo = amdxdna_gem_get_obj(client, cc->data[0], AMDXDNA_BO_CMD); + if (!abo) + return -EINVAL; + cmd = abo->mem.kva; + } + + memset(cmd->data, 0xff, abo->mem.size - sizeof(*cmd)); + if (cc) + amdxdna_gem_put_obj(abo); + + return 0; +} + /* * This should be called in close() and remove(). DO NOT call in other syscalls. * This guarantee that when hwctx and resources will be released, if user @@ -266,9 +297,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr struct amdxdna_drm_config_hwctx *args = data; struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_hwctx *hwctx; - int ret, idx; u32 buf_size; void *buf; + int ret; u64 val; if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) @@ -310,20 +341,17 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr return -EINVAL; } - mutex_lock(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); + guard(mutex)(&xdna->dev_lock); hwctx = xa_load(&client->hwctx_xa, args->handle); if (!hwctx) { XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); ret = -EINVAL; - goto unlock_srcu; + goto free_buf; } ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); - mutex_unlock(&xdna->dev_lock); +free_buf: kfree(buf); return ret; } @@ -334,7 +362,7 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) struct amdxdna_hwctx *hwctx; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; - int ret, idx; + int ret; if (!xdna->dev_info->ops->hwctx_sync_debug_bo) return -EOPNOTSUPP; @@ -345,17 +373,15 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) abo = to_xdna_obj(gobj); guard(mutex)(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx); if (!hwctx) { ret = -EINVAL; - goto unlock_srcu; + goto put_obj; } ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); +put_obj: drm_gem_object_put(gobj); return ret; } @@ -420,6 +446,7 @@ amdxdna_arg_bos_lookup(struct amdxdna_client *client, void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) { trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); + amdxdna_pm_suspend_put(job->hwctx->client->xdna); amdxdna_arg_bos_put(job); amdxdna_gem_put_obj(job->cmd_bo); dma_fence_put(job->fence); @@ -457,6 +484,12 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, goto cmd_put; } + ret = amdxdna_pm_resume_get(xdna); + if (ret) { + XDNA_ERR(xdna, "Resume failed, ret %d", ret); + goto put_bos; + } + idx = srcu_read_lock(&client->hwctx_srcu); hwctx = xa_load(&client->hwctx_xa, hwctx_hdl); if (!hwctx) { @@ -497,6 +530,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, dma_fence_put(job->fence); unlock_srcu: srcu_read_unlock(&client->hwctx_srcu, idx); + amdxdna_pm_suspend_put(xdna); +put_bos: amdxdna_arg_bos_put(job); cmd_put: amdxdna_gem_put_obj(job->cmd_bo);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index 16c85f0..fbdf9d0 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -167,6 +167,9 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); +int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo, + struct amdxdna_sched_job *job, u32 cmd_idx, + enum ert_cmd_state error_state); void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 8c290dd..d60db49 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -21,8 +21,6 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_ubuf.h" -#define XDNA_MAX_CMD_BO_SIZE SZ_32K - MODULE_IMPORT_NS("DMA_BUF"); static int @@ -745,12 +743,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, { struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; - int ret; - - if (args->size > XDNA_MAX_CMD_BO_SIZE) { - XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size); - return ERR_PTR(-EINVAL); - } if (args->size < sizeof(struct amdxdna_cmd)) { XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size); @@ -764,17 +756,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); - if (ret) { - XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); - goto release_obj; - } - return abo; - -release_obj: - drm_gem_object_put(to_gobj(abo)); - return ERR_PTR(ret); } int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -871,6 +853,7 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, struct amdxdna_dev *xdna = client->xdna; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; + int ret; gobj = drm_gem_object_lookup(client->filp, bo_hdl); if (!gobj) { @@ -879,9 +862,26 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, } abo = to_xdna_obj(gobj); - if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type) + if (bo_type != AMDXDNA_BO_INVALID && abo->type != bo_type) + goto put_obj; + + if (bo_type != AMDXDNA_BO_CMD || abo->mem.kva) return abo; + if (abo->mem.size > SZ_32K) { + XDNA_ERR(xdna, "Cmd bo is too big %ld", abo->mem.size); + goto put_obj; + } + + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); + if (ret) { + XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); + goto put_obj; + } + + return abo; + +put_obj: drm_gem_object_put(gobj); return NULL; }
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 235a940..46d844a 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -460,26 +460,49 @@ int xdna_mailbox_send_msg(struct mailbox_channel *mb_chann, return ret; } -struct mailbox_channel * -xdna_mailbox_create_channel(struct mailbox *mb, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 iohub_int_addr, - int mb_irq) +struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb) { struct mailbox_channel *mb_chann; - int ret; - - if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { - pr_err("Ring buf size must be power of 2"); - return NULL; - } mb_chann = kzalloc_obj(*mb_chann); if (!mb_chann) return NULL; + INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); + mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); + if (!mb_chann->work_q) { + MB_ERR(mb_chann, "Create workqueue failed"); + goto free_chann; + } mb_chann->mb = mb; + + return mb_chann; + +free_chann: + kfree(mb_chann); + return NULL; +} + +void xdna_mailbox_free_channel(struct mailbox_channel *mb_chann) +{ + destroy_workqueue(mb_chann->work_q); + kfree(mb_chann); +} + +int +xdna_mailbox_start_channel(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 iohub_int_addr, + int mb_irq) +{ + int ret; + + if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) { + pr_err("Ring buf size must be power of 2"); + return -EINVAL; + } + mb_chann->msix_irq = mb_irq; mb_chann->iohub_int_addr = iohub_int_addr; memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); @@ -489,61 +512,37 @@ xdna_mailbox_create_channel(struct mailbox *mb, mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); - INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker); - mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME); - if (!mb_chann->work_q) { - MB_ERR(mb_chann, "Create workqueue failed"); - goto free_and_out; - } - /* Everything look good. Time to enable irq handler */ ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann); if (ret) { MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret); - goto destroy_wq; + return ret; } mb_chann->bad_state = false; mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0); - MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq); - return mb_chann; - -destroy_wq: - destroy_workqueue(mb_chann->work_q); -free_and_out: - kfree(mb_chann); - return NULL; -} - -int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) -{ - struct mailbox_msg *mb_msg; - unsigned long msg_id; - - MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); - free_irq(mb_chann->msix_irq, mb_chann); - destroy_workqueue(mb_chann->work_q); - /* We can clean up and release resources */ - - xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) - mailbox_release_msg(mb_chann, mb_msg); - - xa_destroy(&mb_chann->chan_xa); - - MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); - kfree(mb_chann); + MB_DBG(mb_chann, "Mailbox channel started (irq: %d)", mb_chann->msix_irq); return 0; } void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) { + struct mailbox_msg *mb_msg; + unsigned long msg_id; + /* Disable an irq and wait. This might sleep. */ - disable_irq(mb_chann->msix_irq); + free_irq(mb_chann->msix_irq, mb_chann); /* Cancel RX work and wait for it to finish */ - cancel_work_sync(&mb_chann->rx_work); - MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); + drain_workqueue(mb_chann->work_q); + + /* We can clean up and release resources */ + xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) + mailbox_release_msg(mb_chann, mb_msg); + xa_destroy(&mb_chann->chan_xa); + + MB_DBG(mb_chann, "Mailbox channel stopped, irq: %d", mb_chann->msix_irq); } struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h index ea367f2..8b1e009 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.h +++ b/drivers/accel/amdxdna/amdxdna_mailbox.h
@@ -74,9 +74,16 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, const struct xdna_mailbox_res *res); /* - * xdna_mailbox_create_channel() -- Create a mailbox channel instance + * xdna_mailbox_alloc_channel() -- alloc a mailbox channel * - * @mailbox: the handle return from xdna_mailbox_create() + * @mb: mailbox handle + */ +struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb); + +/* + * xdna_mailbox_start_channel() -- start a mailbox channel instance + * + * @mb_chann: the handle return from xdna_mailbox_alloc_channel() * @x2i: host to firmware mailbox resources * @i2x: firmware to host mailbox resources * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt @@ -84,28 +91,24 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev, * * Return: If success, return a handle of mailbox channel. Otherwise, return NULL. */ -struct mailbox_channel * -xdna_mailbox_create_channel(struct mailbox *mailbox, - const struct xdna_mailbox_chann_res *x2i, - const struct xdna_mailbox_chann_res *i2x, - u32 xdna_mailbox_intr_reg, - int mb_irq); +int +xdna_mailbox_start_channel(struct mailbox_channel *mb_chann, + const struct xdna_mailbox_chann_res *x2i, + const struct xdna_mailbox_chann_res *i2x, + u32 xdna_mailbox_intr_reg, + int mb_irq); /* - * xdna_mailbox_destroy_channel() -- destroy mailbox channel + * xdna_mailbox_free_channel() -- free mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() - * - * Return: if success, return 0. otherwise return error code */ -int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann); +void xdna_mailbox_free_channel(struct mailbox_channel *mailbox_chann); /* * xdna_mailbox_stop_channel() -- stop mailbox channel * * @mailbox_chann: the handle return from xdna_mailbox_create_channel() - * - * Return: if success, return 0. otherwise return error code */ void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 4ada45d..a438459 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -23,6 +23,9 @@ MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); +MODULE_FIRMWARE("amdnpu/1502_00/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin"); /* * 0.0: Initial version
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c index d024d48..b1fafdd 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.c +++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -16,6 +16,7 @@ int amdxdna_pm_suspend(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->suspend) ret = xdna->dev_info->ops->suspend(xdna); @@ -28,6 +29,7 @@ int amdxdna_pm_resume(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->resume) ret = xdna->dev_info->ops->resume(xdna);
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h index 77b2d6e..3d26b97 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.h +++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -15,4 +15,15 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna); void amdxdna_pm_init(struct amdxdna_dev *xdna); void amdxdna_pm_fini(struct amdxdna_dev *xdna); +static inline int amdxdna_pm_resume_get_locked(struct amdxdna_dev *xdna) +{ + int ret; + + mutex_unlock(&xdna->dev_lock); + ret = amdxdna_pm_resume_get(xdna); + mutex_lock(&xdna->dev_lock); + + return ret; +} + #endif /* _AMDXDNA_PM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index b509f10..fb71d6e 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c
@@ -7,6 +7,7 @@ #include <drm/drm_device.h> #include <drm/drm_print.h> #include <linux/dma-buf.h> +#include <linux/overflow.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> @@ -176,7 +177,10 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, goto free_ent; } - exp_info.size += va_ent[i].len; + if (check_add_overflow(exp_info.size, va_ent[i].len, &exp_info.size)) { + ret = -EINVAL; + goto free_ent; + } } ubuf->nr_pages = exp_info.size >> PAGE_SHIFT;
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 6f36a27..1320e92 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -67,12 +67,12 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = { static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { { .major = 5, .min_minor = 7 }, - { .features = BIT_U64(AIE2_NPU_COMMAND), .min_minor = 8 }, + { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 5, .min_minor = 8 }, { 0 } }; static const struct amdxdna_dev_priv npu1_dev_priv = { - .fw_path = "amdnpu/1502_00/npu.sbin", + .fw_path = "amdnpu/1502_00/", .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, .fw_feature_tbl = npu1_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index a8d6f76..ce25eef 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -98,7 +98,7 @@ const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { }; static const struct amdxdna_dev_priv npu4_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index c0a35cf..c0ac5da 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -63,7 +63,7 @@ #define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu5_dev_priv = { - .fw_path = "amdnpu/17f0_11/npu.sbin", + .fw_path = "amdnpu/17f0_11/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 1fb07df..ce591ed 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -63,7 +63,7 @@ #define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu6_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table,
diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index 4fbd4bb..7994e70 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -154,7 +154,7 @@ static void cmd_state_init(struct cmd_state *st) static u64 cmd_to_addr(u32 *cmd) { - return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1]; + return (((u64)cmd[0] & 0xff0000) << 16) | cmd[1]; } static u64 dma_length(struct ethosu_validated_cmdstream_info *info, @@ -245,11 +245,14 @@ static int calc_sizes(struct drm_device *ddev, ((st->ifm.stride_kernel >> 1) & 0x1) + 1; u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) + (st->ifm.stride_kernel & 0x1) + 1; - u32 ifm_height = st->ofm.height[2] * stride_y + + s32 ifm_height = st->ofm.height[2] * stride_y + st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom); - u32 ifm_width = st->ofm.width * stride_x + + s32 ifm_width = st->ofm.width * stride_x + st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right); + if (ifm_height < 0 || ifm_width < 0) + return -EINVAL; + len = feat_matrix_length(info, &st->ifm, ifm_width, ifm_height, st->ifm.depth); dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n", @@ -417,7 +420,10 @@ static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev, return ret; break; case NPU_OP_ELEMENTWISE: - use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) || + use_scale = ethosu_is_u65(edev) ? + (st.ifm2.broadcast & 0x80) : + (st.ifm2.broadcast == 8); + use_ifm2 = !(use_scale || (param == 5) || (param == 6) || (param == 7) || (param == 0x24)); use_ifm = st.ifm.broadcast != 8; ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);
diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c index 8598a36..ec85f41 100644 --- a/drivers/accel/ethosu/ethosu_job.c +++ b/drivers/accel/ethosu/ethosu_job.c
@@ -143,17 +143,10 @@ static int ethosu_job_push(struct ethosu_job *job) return ret; } -static void ethosu_job_cleanup(struct kref *ref) +static void ethosu_job_err_cleanup(struct ethosu_job *job) { - struct ethosu_job *job = container_of(ref, struct ethosu_job, - refcount); unsigned int i; - pm_runtime_put_autosuspend(job->dev->base.dev); - - dma_fence_put(job->done_fence); - dma_fence_put(job->inference_done_fence); - for (i = 0; i < job->region_cnt; i++) drm_gem_object_put(job->region_bo[i]); @@ -162,6 +155,19 @@ static void ethosu_job_cleanup(struct kref *ref) kfree(job); } +static void ethosu_job_cleanup(struct kref *ref) +{ + struct ethosu_job *job = container_of(ref, struct ethosu_job, + refcount); + + pm_runtime_put_autosuspend(job->dev->base.dev); + + dma_fence_put(job->done_fence); + dma_fence_put(job->inference_done_fence); + + ethosu_job_err_cleanup(job); +} + static void ethosu_job_put(struct ethosu_job *job) { kref_put(&job->refcount, ethosu_job_cleanup); @@ -454,12 +460,16 @@ static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file } } ret = ethosu_job_push(ejob); + if (!ret) { + ethosu_job_put(ejob); + return 0; + } out_cleanup_job: if (ret) drm_sched_job_cleanup(&ejob->base); out_put_job: - ethosu_job_put(ejob); + ethosu_job_err_cleanup(ejob); return ret; }
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 5b34b6f..f1b6155 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -35,6 +35,7 @@ #define IVPU_HW_IP_60XX 60 #define IVPU_HW_IP_REV_LNL_B0 4 +#define IVPU_HW_IP_REV_NVL_A0 0 #define IVPU_HW_BTRS_MTL 1 #define IVPU_HW_BTRS_LNL 2
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index d69cd0d..d4a9bcd 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -70,8 +70,10 @@ static void wa_init(struct ivpu_device *vdev) if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && - ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + if ((ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) || + (ivpu_device_id(vdev) == PCI_DEVICE_ID_NVL && + ivpu_revision(vdev) == IVPU_HW_IP_REV_NVL_A0)) vdev->wa.disable_clock_relinquish = true; if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE)
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index 421242ac..fc0ee8d 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
@@ -121,12 +121,6 @@ #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) -#define VPU_40XX_HOST_SS_AON_RETENTION0 0x0003000cu -#define VPU_40XX_HOST_SS_AON_RETENTION1 0x00030010u -#define VPU_40XX_HOST_SS_AON_RETENTION2 0x00030014u -#define VPU_40XX_HOST_SS_AON_RETENTION3 0x00030018u -#define VPU_40XX_HOST_SS_AON_RETENTION4 0x0003001cu - #define VPU_40XX_HOST_SS_AON_IDLE_GEN 0x00030200u #define VPU_40XX_HOST_SS_AON_IDLE_GEN_EN_MASK BIT_MASK(0) #define VPU_40XX_HOST_SS_AON_IDLE_GEN_HW_PG_EN_MASK BIT_MASK(1)
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 959984c..37f95a0 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -931,7 +931,6 @@ static int soc_cpu_boot_40xx(struct ivpu_device *vdev) static int soc_cpu_boot_60xx(struct ivpu_device *vdev) { - REGV_WR64(VPU_40XX_HOST_SS_AON_RETENTION1, vdev->fw->mem_bp->vpu_addr); soc_cpu_set_entry_point_40xx(vdev, vdev->fw->cold_boot_entry_point); return 0;
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index f698d5d..43f84d4 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c
@@ -914,7 +914,7 @@ static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len */ return -ENODEV; - if (status) { + if (usr && status) { /* * Releasing resources failed on the device side, which puts * us in a bind since they may still be in use, so enable the @@ -1109,6 +1109,9 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u mutex_lock(&qdev->cntl_mutex); if (!list_empty(&elem.list)) list_del(&elem.list); + /* resp_worker() processed the response but the wait was interrupted */ + else if (ret == -ERESTARTSYS) + ret = 0; if (!ret && !elem.buf) ret = -ETIMEDOUT; else if (ret > 0 && !elem.buf) @@ -1419,9 +1422,49 @@ static void resp_worker(struct work_struct *work) } mutex_unlock(&qdev->cntl_mutex); - if (!found) + if (!found) { + /* + * The user might have gone away at this point without waiting + * for QAIC_TRANS_DEACTIVATE_FROM_DEV transaction coming from + * the device. If this is not handled correctly, the host will + * not know that the DBC[n] has been freed on the device. + * Due to this failure in synchronization between the device and + * the host, if another user requests to activate a network, and + * the device assigns DBC[n] again, save_dbc_buf() will hang, + * waiting for dbc[n]->in_use to be set to false, which will not + * happen unless the qaic_dev_reset_clean_local_state() gets + * called by resetting the device (or re-inserting the module). + * + * As a solution, we look for QAIC_TRANS_DEACTIVATE_FROM_DEV + * transactions in the message before disposing of it, then + * handle releasing the DBC resources. + * + * Since the user has gone away, if the device could not + * deactivate the network (status != 0), there is no way to + * enable and reassign the DBC to the user. We can put trust in + * the device that it will release all the active DBCs in + * response to the QAIC_TRANS_TERMINATE_TO_DEV transaction, + * otherwise, the user can issue an soc_reset to the device. + */ + u32 msg_count = le32_to_cpu(msg->hdr.count); + u32 msg_len = le32_to_cpu(msg->hdr.len); + u32 len = 0; + int j; + + for (j = 0; j < msg_count && len < msg_len; ++j) { + struct wire_trans_hdr *trans_hdr; + + trans_hdr = (struct wire_trans_hdr *)(msg->data + len); + if (le32_to_cpu(trans_hdr->type) == QAIC_TRANS_DEACTIVATE_FROM_DEV) { + if (decode_deactivate(qdev, trans_hdr, &len, NULL)) + len += le32_to_cpu(trans_hdr->len); + } else { + len += le32_to_cpu(trans_hdr->len); + } + } /* request must have timed out, drop packet */ kfree(msg); + } kfree(resp); }
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index df0ff07..6f4b545 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig
@@ -9,6 +9,7 @@ menuconfig ACPI bool "ACPI (Advanced Configuration and Power Interface) Support" depends on ARCH_SUPPORTS_ACPI + select AUXILIARY_BUS select PNP select NLS select CRC32
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 64199b1..a09636a 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c
@@ -135,7 +135,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, } } - if (adev->device_type == ACPI_BUS_TYPE_DEVICE && !adev->pnp.type.backlight) { + if (adev->device_type == ACPI_BUS_TYPE_DEVICE) { LIST_HEAD(resource_list); count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index b34a480..b1652ca 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c
@@ -113,6 +113,10 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) PCI_ANY_ID, PCI_ANY_ID, NULL); if (ide_dev) { errata.piix4.bmisx = pci_resource_start(ide_dev, 4); + if (errata.piix4.bmisx) + dev_dbg(&ide_dev->dev, + "Bus master activity detection (BM-IDE) erratum enabled\n"); + pci_dev_put(ide_dev); } @@ -131,20 +135,17 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) if (isa_dev) { pci_read_config_byte(isa_dev, 0x76, &value1); pci_read_config_byte(isa_dev, 0x77, &value2); - if ((value1 & 0x80) || (value2 & 0x80)) + if ((value1 & 0x80) || (value2 & 0x80)) { errata.piix4.fdma = 1; + dev_dbg(&isa_dev->dev, + "Type-F DMA livelock erratum (C3 disabled)\n"); + } pci_dev_put(isa_dev); } break; } - if (ide_dev) - dev_dbg(&ide_dev->dev, "Bus master activity detection (BM-IDE) erratum enabled\n"); - - if (isa_dev) - dev_dbg(&isa_dev->dev, "Type-F DMA livelock erratum (C3 disabled)\n"); - return 0; }
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 3fa28f1..adbaf02 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c
@@ -9,6 +9,7 @@ #define pr_fmt(fmt) "ACPI: video: " fmt +#include <linux/auxiliary_bus.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -21,7 +22,6 @@ #include <linux/sort.h> #include <linux/pci.h> #include <linux/pci_ids.h> -#include <linux/platform_device.h> #include <linux/slab.h> #include <linux/dmi.h> #include <linux/suspend.h> @@ -77,8 +77,9 @@ static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); static LIST_HEAD(video_bus_head); -static int acpi_video_bus_probe(struct platform_device *pdev); -static void acpi_video_bus_remove(struct platform_device *pdev); +static int acpi_video_bus_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id); +static void acpi_video_bus_remove(struct auxiliary_device *aux); static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data); /* @@ -93,19 +94,16 @@ enum acpi_video_level_idx { ACPI_VIDEO_FIRST_LEVEL, /* actual supported levels begin here */ }; -static const struct acpi_device_id video_device_ids[] = { - {ACPI_VIDEO_HID, 0}, - {"", 0}, +static const struct auxiliary_device_id video_bus_auxiliary_id_table[] = { + { .name = "acpi.video_bus" }, + {}, }; -MODULE_DEVICE_TABLE(acpi, video_device_ids); +MODULE_DEVICE_TABLE(auxiliary, video_bus_auxiliary_id_table); -static struct platform_driver acpi_video_bus = { +static struct auxiliary_driver acpi_video_bus = { .probe = acpi_video_bus_probe, .remove = acpi_video_bus_remove, - .driver = { - .name = "acpi-video", - .acpi_match_table = video_device_ids, - }, + .id_table = video_bus_auxiliary_id_table, }; struct acpi_video_bus_flags { @@ -1885,7 +1883,7 @@ static void acpi_video_dev_add_notify_handler(struct acpi_video_device *device) } static int acpi_video_bus_add_notify_handler(struct acpi_video_bus *video, - struct platform_device *pdev) + struct device *parent) { struct input_dev *input; struct acpi_video_device *dev; @@ -1908,7 +1906,7 @@ static int acpi_video_bus_add_notify_handler(struct acpi_video_bus *video, input->phys = video->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; - input->dev.parent = &pdev->dev; + input->dev.parent = parent; input->evbit[0] = BIT(EV_KEY); set_bit(KEY_SWITCHVIDEOMODE, input->keybit); set_bit(KEY_VIDEO_NEXT, input->keybit); @@ -1980,9 +1978,10 @@ static int acpi_video_bus_put_devices(struct acpi_video_bus *video) static int instance; -static int acpi_video_bus_probe(struct platform_device *pdev) +static int acpi_video_bus_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id_unused) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device = ACPI_COMPANION(&aux_dev->dev); struct acpi_video_bus *video; bool auto_detect; int error; @@ -2019,7 +2018,7 @@ static int acpi_video_bus_probe(struct platform_device *pdev) instance++; } - platform_set_drvdata(pdev, video); + auxiliary_set_drvdata(aux_dev, video); video->device = device; strscpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME); @@ -2068,7 +2067,7 @@ static int acpi_video_bus_probe(struct platform_device *pdev) !auto_detect) acpi_video_bus_register_backlight(video); - error = acpi_video_bus_add_notify_handler(video, pdev); + error = acpi_video_bus_add_notify_handler(video, &aux_dev->dev); if (error) goto err_del; @@ -2096,10 +2095,10 @@ static int acpi_video_bus_probe(struct platform_device *pdev) return error; } -static void acpi_video_bus_remove(struct platform_device *pdev) +static void acpi_video_bus_remove(struct auxiliary_device *aux_dev) { - struct acpi_video_bus *video = platform_get_drvdata(pdev); - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_video_bus *video = auxiliary_get_drvdata(aux_dev); + struct acpi_device *device = ACPI_COMPANION(&aux_dev->dev); acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY, acpi_video_bus_notify); @@ -2163,7 +2162,7 @@ int acpi_video_register(void) dmi_check_system(video_dmi_table); - ret = platform_driver_register(&acpi_video_bus); + ret = auxiliary_driver_register(&acpi_video_bus); if (ret) goto leave; @@ -2183,7 +2182,7 @@ void acpi_video_unregister(void) { mutex_lock(®ister_count_mutex); if (register_count) { - platform_driver_unregister(&acpi_video_bus); + auxiliary_driver_unregister(&acpi_video_bus); register_count = 0; may_report_brightness_keys = false; }
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 5f70b19..07d5790 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h
@@ -379,8 +379,9 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = { {{"_CPC", METHOD_0ARGS, METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Variable-length (Ints/Bufs) */ - PACKAGE_INFO(ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER, 0, - 0, 0, 0), + PACKAGE_INFO(ACPI_PTYPE1_VAR, + ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER | + ACPI_RTYPE_PACKAGE, 0, 0, 0, 0), {{"_CR3", METHOD_0ARGS, /* ACPI 6.0 */ METHOD_RETURNS(ACPI_RTYPE_INTEGER)}}, @@ -450,7 +451,7 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = { {{"_DSM", METHOD_4ARGS(ACPI_TYPE_BUFFER, ACPI_TYPE_INTEGER, ACPI_TYPE_INTEGER, - ACPI_TYPE_ANY | ACPI_TYPE_PACKAGE) | + ACPI_TYPE_PACKAGE | ACPI_TYPE_ANY) | ARG_COUNT_IS_MINIMUM, METHOD_RETURNS(ACPI_RTYPE_ALL)}}, /* Must return a value, but it can be of any type */
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index f6707325..2ec095e2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c
@@ -818,9 +818,6 @@ const struct acpi_device *acpi_companion_match(const struct device *dev) if (list_empty(&adev->pnp.ids)) return NULL; - if (adev->pnp.type.backlight) - return adev; - return acpi_primary_dev_companion(adev, dev); }
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f257961..aa55ecf 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c
@@ -1457,15 +1457,6 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) return 0; /* - * Skip devices whose ACPI companions don't support power management and - * don't have a wakeup GPE. - */ - if (!acpi_device_power_manageable(adev) && !acpi_device_can_wakeup(adev)) { - dev_dbg(dev, "No ACPI power management or wakeup GPE\n"); - return 0; - } - - /* * Only attach the power domain to the first device if the * companion is shared by multiple. This is to prevent doing power * management twice.
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5f63ed1..6f00652 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c
@@ -1656,6 +1656,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca ret = ec_install_handlers(ec, device, call_reg); if (ret) { + ec_remove_handlers(ec); + if (ec == first_ec) first_ec = NULL;
diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index f2c943b..9470f18 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c
@@ -390,6 +390,19 @@ static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = { }, /* + * The screen backlight turns off during udev device creation + * when returning true for _OSI("Windows 2009") + */ + { + .callback = dmi_disable_osi_win7, + .ident = "Acer Aspire One D255", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOD255"), + }, + }, + + /* * The wireless hotkey does not work on those machines when * returning true for _OSI("Windows 2012") */
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5b777316..62b9c83 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c
@@ -1681,7 +1681,7 @@ acpi_status __init acpi_os_initialize(void) * Use acpi_os_map_generic_address to pre-map the reset * register if it's in system memory. */ - void *rv; + void __iomem *rv; rv = acpi_os_map_generic_address(&acpi_gbl_FADT.reset_register); pr_debug("%s: Reset register mapping %s\n", __func__,
diff --git a/drivers/acpi/riscv/rimt.c b/drivers/acpi/riscv/rimt.c index 229c4a0..906282b 100644 --- a/drivers/acpi/riscv/rimt.c +++ b/drivers/acpi/riscv/rimt.c
@@ -263,6 +263,13 @@ static int rimt_iommu_xlate(struct device *dev, struct acpi_rimt_node *node, u32 if (!rimt_fwnode) return -EPROBE_DEFER; + /* + * EPROBE_DEFER ensures IOMMU is probed before the devices that + * depend on them. During shutdown, however, the IOMMU may be removed + * first, leading to issues. To avoid this, a device link is added + * which enforces the correct removal order. + */ + device_link_add(dev, rimt_fwnode->dev, DL_FLAG_AUTOREMOVE_CONSUMER); return acpi_iommu_fwspec_init(dev, deviceid, rimt_fwnode); }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index dfdd004..e8cdbdb 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c
@@ -6,6 +6,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include <linux/async.h> +#include <linux/auxiliary_bus.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -2192,6 +2193,44 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, return acpi_bus_check_add(handle, false, (struct acpi_device **)ret_p); } +static void acpi_video_bus_device_release(struct device *dev) +{ + struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); + + kfree(aux_dev); +} + +static void acpi_create_video_bus_device(struct acpi_device *adev, + struct acpi_device *parent) +{ + struct auxiliary_device *aux_dev; + static unsigned int aux_dev_id; + + aux_dev = kzalloc_obj(*aux_dev); + if (!aux_dev) + return; + + aux_dev->id = aux_dev_id++; + aux_dev->name = "video_bus"; + aux_dev->dev.parent = acpi_get_first_physical_node(parent); + if (!aux_dev->dev.parent) + goto err; + + aux_dev->dev.release = acpi_video_bus_device_release; + + if (auxiliary_device_init(aux_dev)) + goto err; + + ACPI_COMPANION_SET(&aux_dev->dev, adev); + if (__auxiliary_device_add(aux_dev, "acpi")) + auxiliary_device_uninit(aux_dev); + + return; + +err: + kfree(aux_dev); +} + struct acpi_scan_system_dev { struct list_head node; struct acpi_device *adev; @@ -2229,6 +2268,12 @@ static void acpi_default_enumeration(struct acpi_device *device) sd->adev = device; list_add_tail(&sd->node, &acpi_scan_system_dev_list); } + } else if (device->pnp.type.backlight) { + struct acpi_device *parent; + + parent = acpi_dev_parent(device); + if (parent) + acpi_create_video_bus_device(device, parent); } else { /* For a regular device object, create a platform device. */ acpi_create_platform_device(device, NULL);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 66ec81e..132a9df 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c
@@ -386,6 +386,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E1"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Lenovo G70-35", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "80Q5"), + }, + }, /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see
diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs index fdd9711..b57e0c7 100644 --- a/drivers/android/binder/page_range.rs +++ b/drivers/android/binder/page_range.rs
@@ -13,6 +13,8 @@ // // The shrinker will use trylock methods because it locks them in a different order. +use crate::AssertSync; + use core::{ marker::PhantomPinned, mem::{size_of, size_of_val, MaybeUninit}, @@ -142,6 +144,30 @@ pub(crate) struct ShrinkablePageRange { _pin: PhantomPinned, } +// We do not define any ops. For now, used only to check identity of vmas. +static BINDER_VM_OPS: AssertSync<bindings::vm_operations_struct> = AssertSync(pin_init::zeroed()); + +// To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we +// check its vm_ops and private data before using it. +fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> { + // SAFETY: Just reading the vm_ops pointer of any active vma is safe. + let vm_ops = unsafe { (*vma.as_ptr()).vm_ops }; + if !ptr::eq(vm_ops, &BINDER_VM_OPS.0) { + return None; + } + + // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe. + let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data }; + // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once + // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any + // VMA associated with it, so there can't be any false positives due to pointer reuse here. + if !ptr::eq(vm_private_data, owner.cast()) { + return None; + } + + vma.as_mixedmap_vma() +} + struct Inner { /// Array of pages. /// @@ -308,6 +334,18 @@ pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { inner.size = num_pages; inner.vma_addr = vma.start(); + // This pointer is only used for comparison - it's not dereferenced. + // + // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on + // `vm_private_data`. + unsafe { + (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::<c_void>() + }; + + // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on + // `vm_ops`. + unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS.0 }; + Ok(num_pages) } @@ -399,22 +437,25 @@ unsafe fn use_page_slow(&self, i: usize) -> Result<()> { // // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a // workqueue. - MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) - .mmap_read_lock() - .vma_lookup(vma_addr) - .ok_or(ESRCH)? - .as_mixedmap_vma() - .ok_or(ESRCH)? - .vm_insert_page(user_page_addr, &new_page) - .inspect_err(|err| { - pr_warn!( - "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", - user_page_addr, - vma_addr, - i, - err - ) - })?; + let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?); + { + let vma_read; + let mmap_read; + let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) { + vma_read = ret; + check_vma(&vma_read, self) + } else { + mmap_read = mm.mmap_read_lock(); + mmap_read + .vma_lookup(vma_addr) + .and_then(|vma| check_vma(vma, self)) + }; + + match vma { + Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?, + None => return Err(ESRCH), + } + } let inner = self.lock.lock(); @@ -667,12 +708,15 @@ fn drop(self: Pin<&mut Self>) { let mmap_read; let mm_mutex; let vma_addr; + let range_ptr; { // CAST: The `list_head` field is first in `PageInfo`. let info = item as *mut PageInfo; // SAFETY: The `range` field of `PageInfo` is immutable. - let range = unsafe { &*((*info).range) }; + range_ptr = unsafe { (*info).range }; + // SAFETY: The `range` outlives its `PageInfo` values. + let range = unsafe { &*range_ptr }; mm = match range.mm.mmget_not_zero() { Some(mm) => MmWithUser::into_mmput_async(mm), @@ -717,9 +761,11 @@ fn drop(self: Pin<&mut Self>) { // SAFETY: The lru lock is locked when this method is called. unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; - if let Some(vma) = mmap_read.vma_lookup(vma_addr) { - let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); - vma.zap_page_range_single(user_page_addr, PAGE_SIZE); + if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) { + if let Some(vma) = check_vma(unchecked_vma, range_ptr) { + let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); + vma.zap_page_range_single(user_page_addr, PAGE_SIZE); + } } drop(mmap_read);
diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index 41de559..f064981 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs
@@ -1295,7 +1295,8 @@ pub(crate) fn clear_death(&self, reader: &mut UserSliceReader, thread: &Thread) } pub(crate) fn dead_binder_done(&self, cookie: u64, thread: &Thread) { - if let Some(death) = self.inner.lock().pull_delivered_death(cookie) { + let death = self.inner.lock().pull_delivered_death(cookie); + if let Some(death) = death { death.set_notification_done(thread); } }
diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs index 07e1dec..ada1d1b 100644 --- a/drivers/android/binder/range_alloc/array.rs +++ b/drivers/android/binder/range_alloc/array.rs
@@ -118,7 +118,7 @@ pub(crate) fn reserve_new( size: usize, is_oneway: bool, pid: Pid, - ) -> Result<usize> { + ) -> Result<(usize, bool)> { // Compute new value of free_oneway_space, which is set only on success. let new_oneway_space = if is_oneway { match self.free_oneway_space.checked_sub(size) { @@ -146,7 +146,38 @@ pub(crate) fn reserve_new( .ok() .unwrap(); - Ok(insert_at_offset) + // Start detecting spammers once we have less than 20% + // of async space left (which is less than 10% of total + // buffer size). + // + // (This will short-circuit, so `low_oneway_space` is + // only called when necessary.) + let oneway_spam_detected = + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); + + Ok((insert_at_offset, oneway_spam_detected)) + } + + /// Find the amount and size of buffers allocated by the current caller. + /// + /// The idea is that once we cross the threshold, whoever is responsible + /// for the low async space is likely to try to send another async transaction, + /// and at some point we'll catch them in the act. This is more efficient + /// than keeping a map per pid. + fn low_oneway_space(&self, calling_pid: Pid) -> bool { + let mut total_alloc_size = 0; + let mut num_buffers = 0; + + // Warn if this pid has more than 50 transactions, or more than 50% of + // async space (which is 25% of total buffer size). Oneway spam is only + // detected when the threshold is exceeded. + for range in &self.ranges { + if range.state.is_oneway() && range.state.pid() == calling_pid { + total_alloc_size += range.size; + num_buffers += 1; + } + } + num_buffers > 50 || total_alloc_size > self.size / 4 } pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
diff --git a/drivers/android/binder/range_alloc/mod.rs b/drivers/android/binder/range_alloc/mod.rs index 2301e2b..1f47344 100644 --- a/drivers/android/binder/range_alloc/mod.rs +++ b/drivers/android/binder/range_alloc/mod.rs
@@ -188,11 +188,11 @@ pub(crate) fn reserve_new(&mut self, mut args: ReserveNewArgs<T>) -> Result<Rese self.reserve_new(args) } Impl::Array(array) => { - let offset = + let (offset, oneway_spam_detected) = array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?; Ok(ReserveNew::Success(ReserveNewSuccess { offset, - oneway_spam_detected: false, + oneway_spam_detected, _empty_array_alloc: args.empty_array_alloc, _new_tree_alloc: args.new_tree_alloc, _tree_alloc: args.tree_alloc,
diff --git a/drivers/android/binder/range_alloc/tree.rs b/drivers/android/binder/range_alloc/tree.rs index 838fdd2..48796fc 100644 --- a/drivers/android/binder/range_alloc/tree.rs +++ b/drivers/android/binder/range_alloc/tree.rs
@@ -164,15 +164,6 @@ pub(crate) fn reserve_new( self.free_oneway_space }; - // Start detecting spammers once we have less than 20% - // of async space left (which is less than 10% of total - // buffer size). - // - // (This will short-circut, so `low_oneway_space` is - // only called when necessary.) - let oneway_spam_detected = - is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); - let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) { None => { pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size); @@ -203,6 +194,15 @@ pub(crate) fn reserve_new( self.free_tree.insert(free_tree_node); } + // Start detecting spammers once we have less than 20% + // of async space left (which is less than 10% of total + // buffer size). + // + // (This will short-circuit, so `low_oneway_space` is + // only called when necessary.) + let oneway_spam_detected = + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); + Ok((found_off, oneway_spam_detected)) }
diff --git a/drivers/android/binder/rust_binder_main.rs b/drivers/android/binder/rust_binder_main.rs index aa5f2a7..0140106 100644 --- a/drivers/android/binder/rust_binder_main.rs +++ b/drivers/android/binder/rust_binder_main.rs
@@ -306,7 +306,7 @@ fn init(_module: &'static kernel::ThisModule) -> Result<Self> { /// Makes the inner type Sync. #[repr(transparent)] pub struct AssertSync<T>(T); -// SAFETY: Used only to insert `file_operations` into a global, which is safe. +// SAFETY: Used only to insert C bindings types into globals, which is safe. unsafe impl<T> Sync for AssertSync<T> {} /// File operations that rust_binderfs.c can use.
diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs index 0b62d24b..c004214 100644 --- a/drivers/android/binder/thread.rs +++ b/drivers/android/binder/thread.rs
@@ -1015,12 +1015,9 @@ pub(crate) fn copy_transaction_data( // Copy offsets if there are any. if offsets_size > 0 { - { - let mut reader = - UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) - .reader(); - alloc.copy_into(&mut reader, aligned_data_size, offsets_size)?; - } + let mut offsets_reader = + UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) + .reader(); let offsets_start = aligned_data_size; let offsets_end = aligned_data_size + offsets_size; @@ -1041,11 +1038,9 @@ pub(crate) fn copy_transaction_data( .step_by(size_of::<u64>()) .enumerate() { - let offset: usize = view - .alloc - .read::<u64>(index_offset)? - .try_into() - .map_err(|_| EINVAL)?; + let offset = offsets_reader.read::<u64>()?; + view.alloc.write(index_offset, &offset)?; + let offset: usize = offset.try_into().map_err(|_| EINVAL)?; if offset < end_of_previous_object || !is_aligned(offset, size_of::<u32>()) { pr_warn!("Got transaction with invalid offset.");
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c index defb057..c7a9627 100644 --- a/drivers/auxdisplay/lcd2s.c +++ b/drivers/auxdisplay/lcd2s.c
@@ -99,8 +99,13 @@ static int lcd2s_print(struct charlcd *lcd, int c) { struct lcd2s_data *lcd2s = lcd->drvdata; u8 buf[2] = { LCD2S_CMD_WRITE, c }; + int ret; - lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + if (ret < 0) + return ret; + if (ret != sizeof(buf)) + return -EIO; return 0; } @@ -108,9 +113,13 @@ static int lcd2s_gotoxy(struct charlcd *lcd, unsigned int x, unsigned int y) { struct lcd2s_data *lcd2s = lcd->drvdata; u8 buf[3] = { LCD2S_CMD_CUR_POS, y + 1, x + 1 }; + int ret; - lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); - + ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + if (ret < 0) + return ret; + if (ret != sizeof(buf)) + return -EIO; return 0; }
diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index 81b4aac..fb6d9294 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c
@@ -365,7 +365,7 @@ static DEFINE_IDA(linedisp_id); static void linedisp_release(struct device *dev) { - struct linedisp *linedisp = to_linedisp(dev); + struct linedisp *linedisp = container_of(dev, struct linedisp, dev); kfree(linedisp->map); kfree(linedisp->message);
diff --git a/drivers/base/base.h b/drivers/base/base.h index 79d031d..1af95ac 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h
@@ -179,19 +179,10 @@ void device_release_driver_internal(struct device *dev, const struct device_driv void driver_detach(const struct device_driver *drv); void driver_deferred_probe_del(struct device *dev); void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf); -static inline int driver_match_device_locked(const struct device_driver *drv, - struct device *dev) -{ - device_lock_assert(dev); - - return drv->bus->match ? drv->bus->match(dev, drv) : 1; -} - static inline int driver_match_device(const struct device_driver *drv, struct device *dev) { - guard(device)(dev); - return driver_match_device_locked(drv, dev); + return drv->bus->match ? drv->bus->match(dev, drv) : 1; } static inline void dev_sync_state(struct device *dev)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c index bb61d8a..8b6722f 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c
@@ -504,6 +504,36 @@ int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, } EXPORT_SYMBOL_GPL(bus_for_each_drv); +static ssize_t driver_override_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + + ret = __device_set_driver_override(dev, buf, count); + if (ret) + return ret; + + return count; +} + +static ssize_t driver_override_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name); +} +static DEVICE_ATTR_RW(driver_override); + +static struct attribute *driver_override_dev_attrs[] = { + &dev_attr_driver_override.attr, + NULL, +}; + +static const struct attribute_group driver_override_dev_group = { + .attrs = driver_override_dev_attrs, +}; + /** * bus_add_device - add device to bus * @dev: device being added @@ -537,9 +567,15 @@ int bus_add_device(struct device *dev) if (error) goto out_put; + if (dev->bus->driver_override) { + error = device_add_group(dev, &driver_override_dev_group); + if (error) + goto out_groups; + } + error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev)); if (error) - goto out_groups; + goto out_override; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) @@ -550,6 +586,9 @@ int bus_add_device(struct device *dev) out_subsys: sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); +out_override: + if (dev->bus->driver_override) + device_remove_group(dev, &driver_override_dev_group); out_groups: device_remove_groups(dev, sp->bus->dev_groups); out_put: @@ -607,6 +646,8 @@ void bus_remove_device(struct device *dev) sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); + if (dev->bus->driver_override) + device_remove_group(dev, &driver_override_dev_group); device_remove_groups(dev, dev->bus->dev_groups); if (klist_node_attached(&dev->p->knode_bus)) klist_del(&dev->p->knode_bus);
diff --git a/drivers/base/core.c b/drivers/base/core.c index 791f9e4..09b98f0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c
@@ -2556,6 +2556,7 @@ static void device_release(struct kobject *kobj) devres_release_all(dev); kfree(dev->dma_range_map); + kfree(dev->driver_override.name); if (dev->release) dev->release(dev); @@ -3159,6 +3160,7 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); + spin_lock_init(&dev->driver_override.lock); lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0354f20..37c7e54 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c
@@ -381,6 +381,66 @@ static void __exit deferred_probe_exit(void) } __exitcall(deferred_probe_exit); +int __device_set_driver_override(struct device *dev, const char *s, size_t len) +{ + const char *new, *old; + char *cp; + + if (!s) + return -EINVAL; + + /* + * The stored value will be used in sysfs show callback (sysfs_emit()), + * which has a length limit of PAGE_SIZE and adds a trailing newline. + * Thus we can store one character less to avoid truncation during sysfs + * show. + */ + if (len >= (PAGE_SIZE - 1)) + return -EINVAL; + + /* + * Compute the real length of the string in case userspace sends us a + * bunch of \0 characters like python likes to do. + */ + len = strlen(s); + + if (!len) { + /* Empty string passed - clear override */ + spin_lock(&dev->driver_override.lock); + old = dev->driver_override.name; + dev->driver_override.name = NULL; + spin_unlock(&dev->driver_override.lock); + kfree(old); + + return 0; + } + + cp = strnchr(s, len, '\n'); + if (cp) + len = cp - s; + + new = kstrndup(s, len, GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock(&dev->driver_override.lock); + old = dev->driver_override.name; + if (cp != s) { + dev->driver_override.name = new; + spin_unlock(&dev->driver_override.lock); + } else { + /* "\n" passed - clear override */ + dev->driver_override.name = NULL; + spin_unlock(&dev->driver_override.lock); + + kfree(new); + } + kfree(old); + + return 0; +} +EXPORT_SYMBOL_GPL(__device_set_driver_override); + /** * device_is_bound() - Check if device is bound to a driver * @dev: device to check @@ -928,7 +988,7 @@ static int __device_attach_driver(struct device_driver *drv, void *_data) bool async_allowed; int ret; - ret = driver_match_device_locked(drv, dev); + ret = driver_match_device(drv, dev); if (ret == 0) { /* no match */ return 0;
diff --git a/drivers/base/platform.c b/drivers/base/platform.c index b45d41b..d44591d 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c
@@ -603,7 +603,6 @@ static void platform_device_release(struct device *dev) kfree(pa->pdev.dev.platform_data); kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); - kfree(pa->pdev.driver_override); kfree(pa); } @@ -1306,38 +1305,9 @@ static ssize_t numa_node_show(struct device *dev, } static DEVICE_ATTR_RO(numa_node); -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", pdev->driver_override); - device_unlock(dev); - - return len; -} - -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct platform_device *pdev = to_platform_device(dev); - int ret; - - ret = driver_set_override(dev, &pdev->driver_override, buf, count); - if (ret) - return ret; - - return count; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *platform_dev_attrs[] = { &dev_attr_modalias.attr, &dev_attr_numa_node.attr, - &dev_attr_driver_override.attr, NULL, }; @@ -1377,10 +1347,12 @@ static int platform_match(struct device *dev, const struct device_driver *drv) { struct platform_device *pdev = to_platform_device(dev); struct platform_driver *pdrv = to_platform_driver(drv); + int ret; /* When driver_override is set, only bind to the matching driver */ - if (pdev->driver_override) - return !strcmp(pdev->driver_override, drv->name); + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Attempt an OF style match first */ if (of_driver_match_device(dev, drv)) @@ -1516,6 +1488,7 @@ static const struct dev_pm_ops platform_dev_pm_ops = { const struct bus_type platform_bus_type = { .name = "platform", .dev_groups = platform_dev_groups, + .driver_override = true, .match = platform_match, .uevent = platform_uevent, .probe = platform_probe,
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0ee8ea9..335288e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c
@@ -1895,6 +1895,7 @@ void pm_runtime_reinit(struct device *dev) void pm_runtime_remove(struct device *dev) { __pm_runtime_disable(dev, false); + flush_work(&dev->power.work); pm_runtime_reinit(dev); }
diff --git a/drivers/base/property.c b/drivers/base/property.c index 6a638605..8d9a34b 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c
@@ -797,7 +797,18 @@ struct fwnode_handle * fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { - return fwnode_call_ptr_op(fwnode, get_next_child_node, child); + struct fwnode_handle *next; + + if (IS_ERR_OR_NULL(fwnode)) + return NULL; + + /* Try to find a child in primary fwnode */ + next = fwnode_call_ptr_op(fwnode, get_next_child_node, child); + if (next) + return next; + + /* When no more children in primary, continue with secondary */ + return fwnode_call_ptr_op(fwnode->secondary, get_next_child_node, child); } EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); @@ -841,19 +852,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { - const struct fwnode_handle *fwnode = dev_fwnode(dev); - struct fwnode_handle *next; - - if (IS_ERR_OR_NULL(fwnode)) - return NULL; - - /* Try to find a child in primary fwnode */ - next = fwnode_get_next_child_node(fwnode, child); - if (next) - return next; - - /* When no more children in primary, continue with secondary */ - return fwnode_get_next_child_node(fwnode->secondary, child); + return fwnode_get_next_child_node(dev_fwnode(dev), child); } EXPORT_SYMBOL_GPL(device_get_next_child_node);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 607c124..e388b19 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c
@@ -1545,6 +1545,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, unsigned int val_num) { void *orig_work_buf; + unsigned int selector_reg; unsigned int win_offset; unsigned int win_page; bool page_chg; @@ -1563,10 +1564,31 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, return -EINVAL; } - /* It is possible to have selector register inside data window. - In that case, selector register is located on every page and - it needs no page switching, when accessed alone. */ + /* + * Calculate the address of the selector register in the corresponding + * data window if it is located on every page. + */ + page_chg = in_range(range->selector_reg, range->window_start, range->window_len); + if (page_chg) + selector_reg = range->range_min + win_page * range->window_len + + range->selector_reg - range->window_start; + + /* + * It is possible to have selector register inside data window. + * In that case, selector register is located on every page and it + * needs no page switching, when accessed alone. + * + * Nevertheless we should synchronize the cache values for it. + * This can't be properly achieved if the selector register is + * the first and the only one to be read inside the data window. + * That's why we update it in that case as well. + * + * However, we specifically avoid updating it for the default page, + * when it's overlapped with the real data window, to prevent from + * infinite looping. + */ if (val_num > 1 || + (page_chg && selector_reg != range->selector_reg) || range->window_start + win_offset != range->selector_reg) { /* Use separate work_buf during page switching */ orig_work_buf = map->work_buf; @@ -1575,7 +1597,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, ret = _regmap_update_bits(map, range->selector_reg, range->selector_mask, win_page << range->selector_shift, - &page_chg, false); + NULL, false); map->work_buf = orig_work_buf;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 742b290..b3dbf6c 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c
@@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) { - struct lru_cache *al = device->act_log; /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - unsigned nr_al_extents; - unsigned available_update_slots; unsigned enr; - D_ASSERT(device, first <= last); - - nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ - available_update_slots = min(al->nr_elements - al->used, - al->max_pending_changes - al->pending_changes); - - /* We want all necessary updates for a given request within the same transaction - * We could first check how many updates are *actually* needed, - * and use that instead of the worst-case nr_al_extents */ - if (available_update_slots < nr_al_extents) { - /* Too many activity log extents are currently "hot". - * - * If we have accumulated pending changes already, - * we made progress. - * - * If we cannot get even a single pending change through, - * stop the fast path until we made some progress, - * or requests to "cold" extents could be starved. */ - if (!al->pending_changes) - __set_bit(__LC_STARVING, &device->act_log->flags); - return -ENOBUFS; + if (i->partially_in_al_next_enr) { + D_ASSERT(device, first < i->partially_in_al_next_enr); + D_ASSERT(device, last >= i->partially_in_al_next_enr); + first = i->partially_in_al_next_enr; } + D_ASSERT(device, first <= last); + /* Is resync active in this area? */ for (enr = first; enr <= last; enr++) { struct lc_element *tmp; @@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * } } - /* Checkout the refcounts. - * Given that we checked for available elements and update slots above, - * this has to be successful. */ + /* Try to checkout the refcounts. */ for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; al_ext = lc_get_cumulative(device->act_log, enr); - if (!al_ext) - drbd_info(device, "LOGIC BUG for enr=%u\n", enr); + + if (!al_ext) { + /* Did not work. We may have exhausted the possible + * changes per transaction. Or raced with someone + * "locking" it against changes. + * Remember where to continue from. + */ + if (enr > first) + i->partially_in_al_next_enr = enr; + return -ENOBUFS; + } } return 0; } @@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) for (enr = first; enr <= last; enr++) { extent = lc_find(device->act_log, enr); - if (!extent) { + /* Yes, this masks a bug elsewhere. However, during normal + * operation this is harmless, so no need to crash the kernel + * by the BUG_ON(refcount == 0) in lc_put(). + */ + if (!extent || extent->refcnt == 0) { drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); continue; }
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 366489b..5d3213b 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h
@@ -8,12 +8,15 @@ struct drbd_interval { struct rb_node rb; sector_t sector; /* start sector of the interval */ - unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + unsigned int size; /* size in bytes */ unsigned int local:1 /* local or remote request? */; unsigned int waiting:1; /* someone is waiting for completion */ unsigned int completed:1; /* this has been completed already; * ignore for conflict detection */ + + /* to resume a partially successful drbd_al_begin_io_nonblock(); */ + unsigned int partially_in_al_next_enr; }; static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b8f0edd..200d464 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c
@@ -32,6 +32,7 @@ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/random.h> #include <linux/reboot.h> #include <linux/notifier.h> @@ -732,9 +733,9 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device) } if (apv >= 88) - strcpy(p->verify_alg, nc->verify_alg); + strscpy(p->verify_alg, nc->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, nc->csums_alg); + strscpy(p->csums_alg, nc->csums_alg); rcu_read_unlock(); return drbd_send_command(peer_device, sock, cmd, size, NULL, 0); @@ -745,6 +746,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm struct drbd_socket *sock; struct p_protocol *p; struct net_conf *nc; + size_t integrity_alg_len; int size, cf; sock = &connection->data; @@ -762,8 +764,10 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm } size = sizeof(*p); - if (connection->agreed_pro_version >= 87) - size += strlen(nc->integrity_alg) + 1; + if (connection->agreed_pro_version >= 87) { + integrity_alg_len = strlen(nc->integrity_alg) + 1; + size += integrity_alg_len; + } p->protocol = cpu_to_be32(nc->wire_protocol); p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); @@ -778,7 +782,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm p->conn_flags = cpu_to_be32(cf); if (connection->agreed_pro_version >= 87) - strcpy(p->integrity_alg, nc->integrity_alg); + strscpy(p->integrity_alg, nc->integrity_alg, integrity_alg_len); rcu_read_unlock(); return __conn_send_command(connection, sock, cmd, size, NULL, 0);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3cfef45..58b95bf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c
@@ -3801,14 +3801,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i *new_net_conf = *old_net_conf; if (verify_tfm) { - strcpy(new_net_conf->verify_alg, p->verify_alg); + strscpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; crypto_free_shash(peer_device->connection->verify_tfm); peer_device->connection->verify_tfm = verify_tfm; drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(new_net_conf->csums_alg, p->csums_alg); + strscpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; crypto_free_shash(peer_device->connection->csums_tfm); peer_device->connection->csums_tfm = csums_tfm;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d15826f..70f75ef 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c
@@ -621,7 +621,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); + drbd_set_out_of_sync(first_peer_device(device), + req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 004f367..63aeb7a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c
@@ -4443,7 +4443,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, /* Skip partition scan if disabled by user */ if (ub->dev_info.flags & UBLK_F_NO_AUTO_PART_SCAN) { - clear_bit(GD_SUPPRESS_PART_SCAN, &disk->state); + /* Not clear for unprivileged daemons, see comment above */ + if (!ub->unprivileged_daemons) + clear_bit(GD_SUPPRESS_PART_SCAN, &disk->state); } else { /* Schedule async partition scan for trusted daemons */ if (!ub->unprivileged_daemons) @@ -5006,15 +5008,22 @@ static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header) return 0; } -static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) +static int ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) { struct ublk_param_basic *p = &ub->params.basic; u64 new_size = header->data[0]; + int ret = 0; mutex_lock(&ub->mutex); + if (!ub->ub_disk) { + ret = -ENODEV; + goto out; + } p->dev_sectors = new_size; set_capacity_and_notify(ub->ub_disk, p->dev_sectors); +out: mutex_unlock(&ub->mutex); + return ret; } struct count_busy { @@ -5335,8 +5344,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_end_recovery(ub, &header); break; case UBLK_CMD_UPDATE_SIZE: - ublk_ctrl_set_size(ub, &header); - ret = 0; + ret = ublk_ctrl_set_size(ub, &header); break; case UBLK_CMD_QUIESCE_DEV: ret = ublk_ctrl_quiesce_dev(ub, &header);
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 65a4026..51c0433 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c
@@ -542,6 +542,21 @@ static void zloop_rw(struct zloop_cmd *cmd) zloop_put_cmd(cmd); } +/* + * Sync the entire FS containing the zone files instead of walking all files. + */ +static int zloop_flush(struct zloop_device *zlo) +{ + struct super_block *sb = file_inode(zlo->data_dir)->i_sb; + int ret; + + down_read(&sb->s_umount); + ret = sync_filesystem(sb); + up_read(&sb->s_umount); + + return ret; +} + static void zloop_handle_cmd(struct zloop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); @@ -562,11 +577,7 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd) zloop_rw(cmd); return; case REQ_OP_FLUSH: - /* - * Sync the entire FS containing the zone files instead of - * walking all files - */ - cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); + cmd->ret = zloop_flush(zlo); break; case REQ_OP_ZONE_RESET: cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); @@ -981,7 +992,8 @@ static int zloop_ctl_add(struct zloop_options *opts) struct queue_limits lim = { .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, .chunk_sectors = opts->zone_size, - .features = BLK_FEAT_ZONED, + .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE, + }; unsigned int nr_zones, i, j; struct zloop_device *zlo; @@ -1162,7 +1174,12 @@ static int zloop_ctl_remove(struct zloop_options *opts) int ret; if (!(opts->mask & ZLOOP_OPT_ID)) { - pr_err("No ID specified\n"); + pr_err("No ID specified for remove\n"); + return -EINVAL; + } + + if (opts->mask & ~ZLOOP_OPT_ID) { + pr_err("Invalid option specified for remove\n"); return -EINVAL; }
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bca3340..af67937 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c
@@ -549,7 +549,7 @@ static ssize_t bd_stat_show(struct device *dev, struct device_attribute *attr, return ret; } -static ssize_t writeback_compressed_store(struct device *dev, +static ssize_t compressed_writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -564,12 +564,12 @@ static ssize_t writeback_compressed_store(struct device *dev, return -EBUSY; } - zram->wb_compressed = val; + zram->compressed_wb = val; return len; } -static ssize_t writeback_compressed_show(struct device *dev, +static ssize_t compressed_writeback_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -577,7 +577,7 @@ static ssize_t writeback_compressed_show(struct device *dev, struct zram *zram = dev_to_zram(dev); guard(rwsem_read)(&zram->dev_lock); - val = zram->wb_compressed; + val = zram->compressed_wb; return sysfs_emit(buf, "%d\n", val); } @@ -917,9 +917,8 @@ static void zram_account_writeback_submit(struct zram *zram) static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) { - u32 size, index = req->pps->index; - int err, prio; - bool huge; + u32 index = req->pps->index; + int err; err = blk_status_to_errno(req->bio.bi_status); if (err) { @@ -946,28 +945,13 @@ static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) goto out; } - if (zram->wb_compressed) { - /* - * ZRAM_WB slots get freed, we need to preserve data required - * for read decompression. - */ - size = get_slot_size(zram, index); - prio = get_slot_comp_priority(zram, index); - huge = test_slot_flag(zram, index, ZRAM_HUGE); - } - - slot_free(zram, index); - set_slot_flag(zram, index, ZRAM_WB); + clear_slot_flag(zram, index, ZRAM_IDLE); + if (test_slot_flag(zram, index, ZRAM_HUGE)) + atomic64_dec(&zram->stats.huge_pages); + atomic64_sub(get_slot_size(zram, index), &zram->stats.compr_data_size); + zs_free(zram->mem_pool, get_slot_handle(zram, index)); set_slot_handle(zram, index, req->blk_idx); - - if (zram->wb_compressed) { - if (huge) - set_slot_flag(zram, index, ZRAM_HUGE); - set_slot_size(zram, index, size); - set_slot_comp_priority(zram, index, prio); - } - - atomic64_inc(&zram->stats.pages_stored); + set_slot_flag(zram, index, ZRAM_WB); out: slot_unlock(zram, index); @@ -1100,7 +1084,7 @@ static int zram_writeback_slots(struct zram *zram, */ if (!test_slot_flag(zram, index, ZRAM_PP_SLOT)) goto next; - if (zram->wb_compressed) + if (zram->compressed_wb) err = read_from_zspool_raw(zram, req->page, index); else err = read_from_zspool(zram, req->page, index); @@ -1429,7 +1413,7 @@ static void zram_async_read_endio(struct bio *bio) * * Keep the existing behavior for now. */ - if (zram->wb_compressed == false) { + if (zram->compressed_wb == false) { /* No decompression needed, complete the parent IO */ bio_endio(req->parent); bio_put(bio); @@ -1508,7 +1492,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, flush_work(&req.work); destroy_work_on_stack(&req.work); - if (req.error || zram->wb_compressed == false) + if (req.error || zram->compressed_wb == false) return req.error; return decompress_bdev_page(zram, page, index); @@ -2010,8 +1994,13 @@ static void slot_free(struct zram *zram, u32 index) set_slot_comp_priority(zram, index, 0); if (test_slot_flag(zram, index, ZRAM_HUGE)) { + /* + * Writeback completion decrements ->huge_pages but keeps + * ZRAM_HUGE flag for deferred decompression path. + */ + if (!test_slot_flag(zram, index, ZRAM_WB)) + atomic64_dec(&zram->stats.huge_pages); clear_slot_flag(zram, index, ZRAM_HUGE); - atomic64_dec(&zram->stats.huge_pages); } if (test_slot_flag(zram, index, ZRAM_WB)) { @@ -3007,7 +2996,7 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); static DEVICE_ATTR_RW(writeback_batch_size); -static DEVICE_ATTR_RW(writeback_compressed); +static DEVICE_ATTR_RW(compressed_writeback); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); @@ -3031,7 +3020,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, &dev_attr_writeback_batch_size.attr, - &dev_attr_writeback_compressed.attr, + &dev_attr_compressed_writeback.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, @@ -3091,7 +3080,7 @@ static int zram_add(void) init_rwsem(&zram->dev_lock); #ifdef CONFIG_ZRAM_WRITEBACK zram->wb_batch_size = 32; - zram->wb_compressed = false; + zram->compressed_wb = false; #endif /* gendisk structure */
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 515a72d9..f0de8f8 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h
@@ -133,7 +133,7 @@ struct zram { #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; bool wb_limit_enable; - bool wb_compressed; + bool compressed_wb; u32 wb_batch_size; u64 bd_wb_limit; struct block_device *bdev;
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 246b620..ab14689 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c
@@ -251,11 +251,13 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) bt_dev_err(hdev, "Hardware error 0x%2.2x", code); + hci_req_sync_lock(hdev); + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Reset after hardware error failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } kfree_skb(skb); @@ -263,18 +265,21 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code) if (IS_ERR(skb)) { bt_dev_err(hdev, "Retrieving Intel exception info failed (%ld)", PTR_ERR(skb)); - return; + goto unlock; } if (skb->len != 13) { bt_dev_err(hdev, "Exception info size mismatch"); kfree_skb(skb); - return; + goto unlock; } bt_dev_err(hdev, "Exception info %s", (char *)(skb->data + 1)); kfree_skb(skb); + +unlock: + hci_req_sync_unlock(hdev); } EXPORT_SYMBOL_GPL(btintel_hw_error);
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 74f820e..3b06269 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c
@@ -787,6 +787,8 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, */ if (soc_type == QCA_WCN3988) rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else if (soc_type == QCA_WCN3998) + rom_ver = ((soc_ver & 0x0000f000) >> 0x07) | (soc_ver & 0x0000000f); else rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a1c5eb9..5c535f3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c
@@ -2376,8 +2376,11 @@ static void btusb_work(struct work_struct *work) if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) { if (hdev->voice_setting & 0x0020) { static const int alts[3] = { 2, 4, 5 }; + unsigned int sco_idx; - new_alts = alts[data->sco_num - 1]; + sco_idx = min_t(unsigned int, data->sco_num - 1, + ARRAY_SIZE(alts) - 1); + new_alts = alts[sco_idx]; } else { new_alts = data->sco_num; }
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 07cc2a7..a889a66 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c
@@ -109,9 +109,6 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; - if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) - return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) {
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 91acf24..91c96ad 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c
@@ -541,6 +541,8 @@ static int download_firmware(struct ll_device *lldev) if (err || !fw->data || !fw->size) { bt_dev_err(lldev->hu.hdev, "request_firmware failed(errno %d) for %s", err, bts_scr_name); + if (!err) + release_firmware(fw); return -EINVAL; } ptr = (void *)fw->data;
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 5b02e7c..bb9f002 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c
@@ -2046,19 +2046,23 @@ static int qca_setup(struct hci_uart *hu) } out: - if (ret && retries < MAX_INIT_RETRIES) { - bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (ret) { qca_power_shutdown(hu); - if (hu->serdev) { - serdev_device_close(hu->serdev); - ret = serdev_device_open(hu->serdev); - if (ret) { - bt_dev_err(hdev, "failed to open port"); - return ret; + + if (retries < MAX_INIT_RETRIES) { + bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (hu->serdev) { + serdev_device_close(hu->serdev); + ret = serdev_device_open(hu->serdev); + if (ret) { + bt_dev_err(hdev, "failed to open port"); + return ret; + } } + retries++; + goto retry; } - retries++; - goto retry; + return ret; } /* Setup bdaddr */
diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index 3f00d95..c920bd6 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c
@@ -36,7 +36,7 @@ static int simple_pm_bus_probe(struct platform_device *pdev) * that's not listed in simple_pm_bus_of_match. We don't want to do any * of the simple-pm-bus tasks for these devices, so return early. */ - if (pdev->driver_override) + if (device_has_driver_override(&pdev->dev)) return 0; match = of_match_device(dev->driver->of_match_table, dev); @@ -78,7 +78,7 @@ static void simple_pm_bus_remove(struct platform_device *pdev) { const void *data = of_device_get_match_data(&pdev->dev); - if (pdev->driver_override || data) + if (device_has_driver_override(&pdev->dev) || data) return; dev_dbg(&pdev->dev, "%s\n", __func__);
diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c index 1d7dd3d..934c508 100644 --- a/drivers/cache/ax45mp_cache.c +++ b/drivers/cache/ax45mp_cache.c
@@ -178,11 +178,11 @@ static const struct of_device_id ax45mp_cache_ids[] = { static int __init ax45mp_cache_init(void) { - struct device_node *np; struct resource res; int ret; - np = of_find_matching_node(NULL, ax45mp_cache_ids); + struct device_node *np __free(device_node) = + of_find_matching_node(NULL, ax45mp_cache_ids); if (!of_device_is_available(np)) return -ENODEV;
diff --git a/drivers/cache/starfive_starlink_cache.c b/drivers/cache/starfive_starlink_cache.c index 24c7d07..3a25d2d 100644 --- a/drivers/cache/starfive_starlink_cache.c +++ b/drivers/cache/starfive_starlink_cache.c
@@ -102,11 +102,11 @@ static const struct of_device_id starlink_cache_ids[] = { static int __init starlink_cache_init(void) { - struct device_node *np; u32 block_size; int ret; - np = of_find_matching_node(NULL, starlink_cache_ids); + struct device_node *np __free(device_node) = + of_find_matching_node(NULL, starlink_cache_ids); if (!of_device_is_available(np)) return -ENODEV;
diff --git a/drivers/char/ipmi/ipmi_ipmb.c b/drivers/char/ipmi/ipmi_ipmb.c index 3a51e58..2881895 100644 --- a/drivers/char/ipmi/ipmi_ipmb.c +++ b/drivers/char/ipmi/ipmi_ipmb.c
@@ -202,11 +202,16 @@ static int ipmi_ipmb_slave_cb(struct i2c_client *client, break; case I2C_SLAVE_READ_REQUESTED: + *val = 0xff; + ipmi_ipmb_check_msg_done(iidev); + break; + case I2C_SLAVE_STOP: ipmi_ipmb_check_msg_done(iidev); break; case I2C_SLAVE_READ_PROCESSED: + *val = 0xff; break; }
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c530966..c41f51c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -602,6 +602,22 @@ static int __ipmi_bmc_register(struct ipmi_smi *intf, static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id, bool rescan); +static void ipmi_lock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_lock_irqsave(&intf->xmit_msgs_lock, *flags); +} + +static void ipmi_unlock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_unlock_irqrestore(&intf->xmit_msgs_lock, *flags); +} + static void free_ipmi_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); @@ -1869,21 +1885,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf, return smi_msg; } -static void smi_send(struct ipmi_smi *intf, +static int smi_send(struct ipmi_smi *intf, const struct ipmi_smi_handlers *handlers, struct ipmi_smi_msg *smi_msg, int priority) { int run_to_completion = READ_ONCE(intf->run_to_completion); unsigned long flags = 0; + int rv = 0; - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); smi_msg = smi_add_send_msg(intf, smi_msg, priority); - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); - if (smi_msg) - handlers->sender(intf->send_info, smi_msg); + if (smi_msg) { + rv = handlers->sender(intf->send_info, smi_msg); + if (rv) { + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); + intf->curr_msg = NULL; + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); + /* + * Something may have been added to the transmit + * queue, so schedule a check for that. + */ + queue_work(system_wq, &intf->smi_work); + } + } + return rv; } static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg) @@ -2296,6 +2323,7 @@ static int i_ipmi_request(struct ipmi_user *user, struct ipmi_recv_msg *recv_msg; int run_to_completion = READ_ONCE(intf->run_to_completion); int rv = 0; + bool in_seq_table = false; if (supplied_recv) { recv_msg = supplied_recv; @@ -2349,33 +2377,50 @@ static int i_ipmi_request(struct ipmi_user *user, rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg, source_address, source_lun, retries, retry_time_ms); + in_seq_table = true; } else if (is_ipmb_direct_addr(addr)) { rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun); } else if (is_lan_addr(addr)) { rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun, retries, retry_time_ms); + in_seq_table = true; } else { - /* Unknown address type. */ + /* Unknown address type. */ ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; } - if (rv) { + if (!rv) { + dev_dbg(intf->si_dev, "Send: %*ph\n", + smi_msg->data_size, smi_msg->data); + + rv = smi_send(intf, intf->handlers, smi_msg, priority); + if (rv != IPMI_CC_NO_ERROR) + /* smi_send() returns an IPMI err, return a Linux one. */ + rv = -EIO; + if (rv && in_seq_table) { + /* + * If it's in the sequence table, it will be + * retried later, so ignore errors. + */ + rv = 0; + /* But we need to fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + smi_msg = NULL; + } + } out_err: + if (!run_to_completion) + mutex_unlock(&intf->users_mutex); + + if (rv) { if (!supplied_smi) ipmi_free_smi_msg(smi_msg); if (!supplied_recv) ipmi_free_recv_msg(recv_msg); - } else { - dev_dbg(intf->si_dev, "Send: %*ph\n", - smi_msg->data_size, smi_msg->data); - - smi_send(intf, intf->handlers, smi_msg, priority); } - if (!run_to_completion) - mutex_unlock(&intf->users_mutex); - return rv; } @@ -3949,12 +3994,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; @@ -4028,12 +4073,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf, msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data_size = 5; - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr; @@ -4173,7 +4218,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, struct ipmi_smi_msg *msg) { struct cmd_rcvr *rcvr; - int rv = 0; + int rv = 0; /* Free by default */ unsigned char netfn; unsigned char cmd; unsigned char chan; @@ -4226,12 +4271,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; @@ -4824,8 +4869,7 @@ static void smi_work(struct work_struct *t) * message delivery. */ restart: - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); if (intf->curr_msg == NULL && !intf->in_shutdown) { struct list_head *entry = NULL; @@ -4841,8 +4885,7 @@ static void smi_work(struct work_struct *t) intf->curr_msg = newmsg; } } - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (newmsg) { cc = intf->handlers->sender(intf->send_info, newmsg); @@ -4850,8 +4893,11 @@ static void smi_work(struct work_struct *t) if (newmsg->recv_msg) deliver_err_response(intf, newmsg->recv_msg, cc); - else - ipmi_free_smi_msg(newmsg); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); + intf->curr_msg = NULL; + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); + ipmi_free_smi_msg(newmsg); + newmsg = NULL; goto restart; } } @@ -4919,16 +4965,14 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf, spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock, flags); - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); /* * We can get an asynchronous event or receive message in addition * to commands we send. */ if (msg == intf->curr_msg) intf->curr_msg = NULL; - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (run_to_completion) smi_work(&intf->smi_work); @@ -5041,7 +5085,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent, ipmi_inc_stat(intf, retransmitted_ipmb_commands); - smi_send(intf, intf->handlers, smi_msg, 0); + /* If this fails we'll retry later or timeout. */ + if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) { + /* But fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + } } else ipmi_free_smi_msg(smi_msg);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 6bdf624..4a9e9de 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -809,6 +809,12 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, */ return_hosed_msg(smi_info, IPMI_BUS_ERR); } + if (smi_info->waiting_msg != NULL) { + /* Also handle if there was a message waiting. */ + smi_info->curr_msg = smi_info->waiting_msg; + smi_info->waiting_msg = NULL; + return_hosed_msg(smi_info, IPMI_BUS_ERR); + } smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_HOSED); goto out; } @@ -918,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) { struct smi_info *smi_info = send_info; unsigned long flags; + int rv = IPMI_CC_NO_ERROR; debug_timestamp(smi_info, "Enqueue"); + /* + * Check here for run to completion mode. A check under lock is + * later. + */ if (smi_info->si_state == SI_HOSED) return IPMI_BUS_ERR; @@ -934,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) } spin_lock_irqsave(&smi_info->si_lock, flags); - /* - * The following two lines don't need to be under the lock for - * the lock's sake, but they do need SMP memory barriers to - * avoid getting things out of order. We are already claiming - * the lock, anyway, so just do it under the lock to avoid the - * ordering problem. - */ - BUG_ON(smi_info->waiting_msg); - smi_info->waiting_msg = msg; - check_start_timer_thread(smi_info); + if (smi_info->si_state == SI_HOSED) { + rv = IPMI_BUS_ERR; + } else { + BUG_ON(smi_info->waiting_msg); + smi_info->waiting_msg = msg; + check_start_timer_thread(smi_info); + } spin_unlock_irqrestore(&smi_info->si_lock, flags); - return IPMI_CC_NO_ERROR; + return rv; } static void set_run_to_completion(void *send_info, bool i_run_to_completion) @@ -1113,7 +1121,9 @@ static void smi_timeout(struct timer_list *t) * SI_USEC_PER_JIFFY); smi_result = smi_event_handler(smi_info, time_diff); - if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { + if (smi_info->si_state == SI_HOSED) { + timeout = jiffies + SI_TIMEOUT_HOSED; + } else if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { /* Running with interrupts, only do long timeouts. */ timeout = jiffies + SI_TIMEOUT_JIFFIES; smi_inc_stat(smi_info, long_timeouts); @@ -2226,7 +2236,8 @@ static void wait_msg_processed(struct smi_info *smi_info) unsigned long jiffies_now; long time_diff; - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { + while (smi_info->si_state != SI_HOSED && + (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) { jiffies_now = jiffies; time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) * SI_USEC_PER_JIFFY);
diff --git a/drivers/char/ipmi/ipmi_si_ls2k.c b/drivers/char/ipmi/ipmi_si_ls2k.c index 45442c2..4c1da80 100644 --- a/drivers/char/ipmi/ipmi_si_ls2k.c +++ b/drivers/char/ipmi/ipmi_si_ls2k.c
@@ -168,7 +168,7 @@ static void ipmi_ls2k_remove(struct platform_device *pdev) ipmi_si_remove_by_dev(&pdev->dev); } -struct platform_driver ipmi_ls2k_platform_driver = { +static struct platform_driver ipmi_ls2k_platform_driver = { .driver = { .name = "ls2k-ipmi-si", },
diff --git a/drivers/char/random.c b/drivers/char/random.c index dcd002e..7ff4d29 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c
@@ -96,8 +96,7 @@ static ATOMIC_NOTIFIER_HEAD(random_ready_notifier); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); -static int ratelimit_disable __read_mostly = - IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); +static int ratelimit_disable __read_mostly = 0; module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); @@ -168,12 +167,6 @@ int __cold execute_with_initialized_rng(struct notifier_block *nb) return ret; } -#define warn_unseeded_randomness() \ - if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ - __func__, (void *)_RET_IP_, crng_init) - - /********************************************************************* * * Fast key erasure RNG, the "crng". @@ -434,7 +427,6 @@ static void _get_random_bytes(void *buf, size_t len) */ void get_random_bytes(void *buf, size_t len) { - warn_unseeded_randomness(); _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); @@ -523,8 +515,6 @@ type get_random_ ##type(void) \ struct batch_ ##type *batch; \ unsigned long next_gen; \ \ - warn_unseeded_randomness(); \ - \ if (!crng_ready()) { \ _get_random_bytes(&ret, sizeof(ret)); \ return ret; \
diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 3ae1626..c781425 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c
@@ -346,7 +346,29 @@ static struct platform_driver imx8qxp_clk_driver = { }, .probe = imx8qxp_clk_probe, }; -module_platform_driver(imx8qxp_clk_driver); + +static int __init imx8qxp_clk_init(void) +{ + int ret; + + ret = platform_driver_register(&imx8qxp_clk_driver); + if (ret) + return ret; + + ret = imx_clk_scu_module_init(); + if (ret) + platform_driver_unregister(&imx8qxp_clk_driver); + + return ret; +} +module_init(imx8qxp_clk_init); + +static void __exit imx8qxp_clk_exit(void) +{ + imx_clk_scu_module_exit(); + platform_driver_unregister(&imx8qxp_clk_driver); +} +module_exit(imx8qxp_clk_exit); MODULE_AUTHOR("Aisheng Dong <aisheng.dong@nxp.com>"); MODULE_DESCRIPTION("NXP i.MX8QXP clock driver");
diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index 33e637a..9b33df9 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c
@@ -191,6 +191,16 @@ static bool imx_scu_clk_is_valid(u32 rsrc_id) return p != NULL; } +int __init imx_clk_scu_module_init(void) +{ + return platform_driver_register(&imx_clk_scu_driver); +} + +void __exit imx_clk_scu_module_exit(void) +{ + return platform_driver_unregister(&imx_clk_scu_driver); +} + int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data) { @@ -215,7 +225,7 @@ int imx_clk_scu_init(struct device_node *np, rsrc_table = data; } - return platform_driver_register(&imx_clk_scu_driver); + return 0; } /* @@ -696,8 +706,7 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name, if (ret) goto put_device; - ret = driver_set_override(&pdev->dev, &pdev->driver_override, - "imx-scu-clk", strlen("imx-scu-clk")); + ret = device_set_driver_override(&pdev->dev, "imx-scu-clk"); if (ret) goto put_device;
diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index af7b697..ca82f2c 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h
@@ -25,6 +25,8 @@ extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8dxl; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qxp; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qm; +int __init imx_clk_scu_module_init(void); +void __exit imx_clk_scu_module_exit(void); int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data); struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec,
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 48a8a60..0df9f46 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c
@@ -793,13 +793,15 @@ static void do_become_nonbusy(struct comedi_device *dev, __comedi_clear_subdevice_runflags(s, COMEDI_SRF_RUNNING | COMEDI_SRF_BUSY); spin_unlock_irqrestore(&s->spin_lock, flags); - if (comedi_is_runflags_busy(runflags)) { + if (async) { /* * "Run active" counter was set to 1 when setting up the * command. Decrement it and wait for it to become 0. */ - comedi_put_is_subdevice_running(s); - wait_for_completion(&async->run_complete); + if (comedi_is_runflags_busy(runflags)) { + comedi_put_is_subdevice_running(s); + wait_for_completion(&async->run_complete); + } comedi_buf_reset(s); async->inttrig = NULL; kfree(async->cmd.chanlist);
diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index db225a3..5ab96b5 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c
@@ -1063,6 +1063,14 @@ int comedi_device_attach(struct comedi_device *dev, struct comedi_devconfig *it) ret = -EIO; goto out; } + if (IS_ENABLED(CONFIG_LOCKDEP)) { + /* + * dev->spinlock is for private use by the attached low-level + * driver. Reinitialize it to stop lock-dependency tracking + * between attachments to different low-level drivers. + */ + spin_lock_init(&dev->spinlock); + } dev->driver = driv; dev->board_name = dev->board_ptr ? *(const char **)dev->board_ptr : dev->driver->driver_name;
diff --git a/drivers/comedi/drivers/dt2815.c b/drivers/comedi/drivers/dt2815.c index 03ba2fd1..d066dc3 100644 --- a/drivers/comedi/drivers/dt2815.c +++ b/drivers/comedi/drivers/dt2815.c
@@ -175,6 +175,18 @@ static int dt2815_attach(struct comedi_device *dev, struct comedi_devconfig *it) ? current_range_type : voltage_range_type; } + /* + * Check if hardware is present before attempting any I/O operations. + * Reading 0xff from status register typically indicates no hardware + * on the bus (floating bus reads as all 1s). + */ + if (inb(dev->iobase + DT2815_STATUS) == 0xff) { + dev_err(dev->class_dev, + "No hardware detected at I/O base 0x%lx\n", + dev->iobase); + return -ENODEV; + } + /* Init the 2815 */ outb(0x00, dev->iobase + DT2815_STATUS); for (i = 0; i < 100; i++) {
diff --git a/drivers/comedi/drivers/me4000.c b/drivers/comedi/drivers/me4000.c index 7dd3a00..effe9fd 100644 --- a/drivers/comedi/drivers/me4000.c +++ b/drivers/comedi/drivers/me4000.c
@@ -315,6 +315,18 @@ static int me4000_xilinx_download(struct comedi_device *dev, unsigned int val; unsigned int i; + /* Get data stream length from header. */ + if (size >= 4) { + file_length = (((unsigned int)data[0] & 0xff) << 24) + + (((unsigned int)data[1] & 0xff) << 16) + + (((unsigned int)data[2] & 0xff) << 8) + + ((unsigned int)data[3] & 0xff); + } + if (size < 16 || file_length > size - 16) { + dev_err(dev->class_dev, "Firmware length inconsistency\n"); + return -EINVAL; + } + if (!xilinx_iobase) return -ENODEV; @@ -346,10 +358,6 @@ static int me4000_xilinx_download(struct comedi_device *dev, outl(val, devpriv->plx_regbase + PLX9052_CNTRL); /* Download Xilinx firmware */ - file_length = (((unsigned int)data[0] & 0xff) << 24) + - (((unsigned int)data[1] & 0xff) << 16) + - (((unsigned int)data[2] & 0xff) << 8) + - ((unsigned int)data[3] & 0xff); usleep_range(10, 1000); for (i = 0; i < file_length; i++) {
diff --git a/drivers/comedi/drivers/me_daq.c b/drivers/comedi/drivers/me_daq.c index 076b150..2f2ea02 100644 --- a/drivers/comedi/drivers/me_daq.c +++ b/drivers/comedi/drivers/me_daq.c
@@ -344,6 +344,25 @@ static int me2600_xilinx_download(struct comedi_device *dev, unsigned int file_length; unsigned int i; + /* + * Format of the firmware + * Build longs from the byte-wise coded header + * Byte 1-3: length of the array + * Byte 4-7: version + * Byte 8-11: date + * Byte 12-15: reserved + */ + if (size >= 4) { + file_length = (((unsigned int)data[0] & 0xff) << 24) + + (((unsigned int)data[1] & 0xff) << 16) + + (((unsigned int)data[2] & 0xff) << 8) + + ((unsigned int)data[3] & 0xff); + } + if (size < 16 || file_length > size - 16) { + dev_err(dev->class_dev, "Firmware length inconsistency\n"); + return -EINVAL; + } + /* disable irq's on PLX */ writel(0x00, devpriv->plx_regbase + PLX9052_INTCSR); @@ -358,22 +377,6 @@ static int me2600_xilinx_download(struct comedi_device *dev, sleep(1); /* - * Format of the firmware - * Build longs from the byte-wise coded header - * Byte 1-3: length of the array - * Byte 4-7: version - * Byte 8-11: date - * Byte 12-15: reserved - */ - if (size < 16) - return -EINVAL; - - file_length = (((unsigned int)data[0] & 0xff) << 24) + - (((unsigned int)data[1] & 0xff) << 16) + - (((unsigned int)data[2] & 0xff) << 8) + - ((unsigned int)data[3] & 0xff); - - /* * Loop for writing firmware byte by byte to xilinx * Firmware data start at offset 16 */
diff --git a/drivers/comedi/drivers/ni_atmio16d.c b/drivers/comedi/drivers/ni_atmio16d.c index e5e7cc4..b057b3b 100644 --- a/drivers/comedi/drivers/ni_atmio16d.c +++ b/drivers/comedi/drivers/ni_atmio16d.c
@@ -698,7 +698,8 @@ static int atmio16d_attach(struct comedi_device *dev, static void atmio16d_detach(struct comedi_device *dev) { - reset_atmio16d(dev); + if (dev->private) + reset_atmio16d(dev); comedi_legacy_detach(dev); }
diff --git a/drivers/counter/rz-mtu3-cnt.c b/drivers/counter/rz-mtu3-cnt.c index e755d54..7bfb6979 100644 --- a/drivers/counter/rz-mtu3-cnt.c +++ b/drivers/counter/rz-mtu3-cnt.c
@@ -107,9 +107,9 @@ static bool rz_mtu3_is_counter_invalid(struct counter_device *counter, int id) struct rz_mtu3_cnt *const priv = counter_priv(counter); unsigned long tmdr; - pm_runtime_get_sync(priv->ch->dev); + pm_runtime_get_sync(counter->parent); tmdr = rz_mtu3_shared_reg_read(priv->ch, RZ_MTU3_TMDR3); - pm_runtime_put(priv->ch->dev); + pm_runtime_put(counter->parent); if (id == RZ_MTU3_32_BIT_CH && test_bit(RZ_MTU3_TMDR3_LWA, &tmdr)) return false; @@ -165,12 +165,12 @@ static int rz_mtu3_count_read(struct counter_device *counter, if (ret) return ret; - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); if (count->id == RZ_MTU3_32_BIT_CH) *val = rz_mtu3_32bit_ch_read(ch, RZ_MTU3_TCNTLW); else *val = rz_mtu3_16bit_ch_read(ch, RZ_MTU3_TCNT); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; @@ -187,26 +187,26 @@ static int rz_mtu3_count_write(struct counter_device *counter, if (ret) return ret; - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); if (count->id == RZ_MTU3_32_BIT_CH) rz_mtu3_32bit_ch_write(ch, RZ_MTU3_TCNTLW, val); else rz_mtu3_16bit_ch_write(ch, RZ_MTU3_TCNT, val); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; } static int rz_mtu3_count_function_read_helper(struct rz_mtu3_channel *const ch, - struct rz_mtu3_cnt *const priv, + struct counter_device *const counter, enum counter_function *function) { u8 timer_mode; - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); timer_mode = rz_mtu3_8bit_ch_read(ch, RZ_MTU3_TMDR1); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); switch (timer_mode & RZ_MTU3_TMDR1_PH_CNT_MODE_MASK) { case RZ_MTU3_TMDR1_PH_CNT_MODE_1: @@ -240,7 +240,7 @@ static int rz_mtu3_count_function_read(struct counter_device *counter, if (ret) return ret; - ret = rz_mtu3_count_function_read_helper(ch, priv, function); + ret = rz_mtu3_count_function_read_helper(ch, counter, function); mutex_unlock(&priv->lock); return ret; @@ -279,9 +279,9 @@ static int rz_mtu3_count_function_write(struct counter_device *counter, return -EINVAL; } - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); rz_mtu3_8bit_ch_write(ch, RZ_MTU3_TMDR1, timer_mode); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; @@ -300,9 +300,9 @@ static int rz_mtu3_count_direction_read(struct counter_device *counter, if (ret) return ret; - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); tsr = rz_mtu3_8bit_ch_read(ch, RZ_MTU3_TSR); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); *direction = (tsr & RZ_MTU3_TSR_TCFD) ? COUNTER_COUNT_DIRECTION_FORWARD : COUNTER_COUNT_DIRECTION_BACKWARD; @@ -377,14 +377,14 @@ static int rz_mtu3_count_ceiling_write(struct counter_device *counter, return -EINVAL; } - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); if (count->id == RZ_MTU3_32_BIT_CH) rz_mtu3_32bit_ch_write(ch, RZ_MTU3_TGRALW, ceiling); else rz_mtu3_16bit_ch_write(ch, RZ_MTU3_TGRA, ceiling); rz_mtu3_8bit_ch_write(ch, RZ_MTU3_TCR, RZ_MTU3_TCR_CCLR_TGRA); - pm_runtime_put(ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; @@ -495,25 +495,28 @@ static int rz_mtu3_count_enable_read(struct counter_device *counter, static int rz_mtu3_count_enable_write(struct counter_device *counter, struct counter_count *count, u8 enable) { - struct rz_mtu3_channel *const ch = rz_mtu3_get_ch(counter, count->id); struct rz_mtu3_cnt *const priv = counter_priv(counter); int ret = 0; + mutex_lock(&priv->lock); + + if (priv->count_is_enabled[count->id] == enable) + goto exit; + if (enable) { - mutex_lock(&priv->lock); - pm_runtime_get_sync(ch->dev); + pm_runtime_get_sync(counter->parent); ret = rz_mtu3_initialize_counter(counter, count->id); if (ret == 0) priv->count_is_enabled[count->id] = true; - mutex_unlock(&priv->lock); } else { - mutex_lock(&priv->lock); rz_mtu3_terminate_counter(counter, count->id); priv->count_is_enabled[count->id] = false; - pm_runtime_put(ch->dev); - mutex_unlock(&priv->lock); + pm_runtime_put(counter->parent); } +exit: + mutex_unlock(&priv->lock); + return ret; } @@ -540,9 +543,9 @@ static int rz_mtu3_cascade_counts_enable_get(struct counter_device *counter, if (ret) return ret; - pm_runtime_get_sync(priv->ch->dev); + pm_runtime_get_sync(counter->parent); tmdr = rz_mtu3_shared_reg_read(priv->ch, RZ_MTU3_TMDR3); - pm_runtime_put(priv->ch->dev); + pm_runtime_put(counter->parent); *cascade_enable = test_bit(RZ_MTU3_TMDR3_LWA, &tmdr); mutex_unlock(&priv->lock); @@ -559,10 +562,10 @@ static int rz_mtu3_cascade_counts_enable_set(struct counter_device *counter, if (ret) return ret; - pm_runtime_get_sync(priv->ch->dev); + pm_runtime_get_sync(counter->parent); rz_mtu3_shared_reg_update_bit(priv->ch, RZ_MTU3_TMDR3, RZ_MTU3_TMDR3_LWA, cascade_enable); - pm_runtime_put(priv->ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; @@ -579,9 +582,9 @@ static int rz_mtu3_ext_input_phase_clock_select_get(struct counter_device *count if (ret) return ret; - pm_runtime_get_sync(priv->ch->dev); + pm_runtime_get_sync(counter->parent); tmdr = rz_mtu3_shared_reg_read(priv->ch, RZ_MTU3_TMDR3); - pm_runtime_put(priv->ch->dev); + pm_runtime_put(counter->parent); *ext_input_phase_clock_select = test_bit(RZ_MTU3_TMDR3_PHCKSEL, &tmdr); mutex_unlock(&priv->lock); @@ -598,11 +601,11 @@ static int rz_mtu3_ext_input_phase_clock_select_set(struct counter_device *count if (ret) return ret; - pm_runtime_get_sync(priv->ch->dev); + pm_runtime_get_sync(counter->parent); rz_mtu3_shared_reg_update_bit(priv->ch, RZ_MTU3_TMDR3, RZ_MTU3_TMDR3_PHCKSEL, ext_input_phase_clock_select); - pm_runtime_put(priv->ch->dev); + pm_runtime_put(counter->parent); mutex_unlock(&priv->lock); return 0; @@ -640,7 +643,7 @@ static int rz_mtu3_action_read(struct counter_device *counter, if (ret) return ret; - ret = rz_mtu3_count_function_read_helper(ch, priv, &function); + ret = rz_mtu3_count_function_read_helper(ch, counter, &function); if (ret) { mutex_unlock(&priv->lock); return ret;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 277884d..1f79452 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c
@@ -1427,12 +1427,9 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, * If there is a problem with its frequency table, take it * offline and drop it. */ - if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING && - policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) { - ret = cpufreq_table_validate_and_sort(policy); - if (ret) - goto out_offline_policy; - } + ret = cpufreq_table_validate_and_sort(policy); + if (ret) + goto out_offline_policy; /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e0e8477..df01d33 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -313,6 +313,17 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } +static void cs_limits(struct cpufreq_policy *policy) +{ + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + /* + * The limits have changed, so may have the current frequency. Reset + * requested_freq to avoid any unintended outcomes due to the mismatch. + */ + dbs_info->requested_freq = policy->cur; +} + static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_groups = cs_groups }, @@ -322,6 +333,7 @@ static struct dbs_governor cs_governor = { .init = cs_init, .exit = cs_exit, .start = cs_start, + .limits = cs_limits, }; #define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 36eb7ae..86f35e4 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c
@@ -468,13 +468,13 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) /* Failure, so roll back. */ pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); - kobject_put(&dbs_data->attr_set.kobj); - policy->governor_data = NULL; if (!have_governor_per_policy()) gov->gdbs_data = NULL; - gov->exit(dbs_data); + + kobject_put(&dbs_data->attr_set.kobj); + goto free_policy_dbs_info; free_dbs_data: kfree(dbs_data); @@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs; /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */ @@ -574,6 +575,8 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); + if (gov->limits) + gov->limits(policy); mutex_unlock(&policy_dbs->update_mutex); out:
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 168c23f..1462d59 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h
@@ -138,6 +138,7 @@ struct dbs_governor { int (*init)(struct dbs_data *dbs_data); void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy)
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 7f251daf0..5b364d8 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c
@@ -360,6 +360,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) if (policy_has_boost_freq(policy)) policy->boost_supported = true; + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING || + policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_DESCENDING) + return 0; + return set_freq_table_sorted(policy); }
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a48af35..11c58af 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c
@@ -1476,13 +1476,13 @@ static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, refresh_frequency_limits(policy); } -static bool intel_pstate_update_max_freq(struct cpudata *cpudata) +static bool intel_pstate_update_max_freq(int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return false; - __intel_pstate_update_max_freq(policy, cpudata); + __intel_pstate_update_max_freq(policy, all_cpu_data[cpu]); return true; } @@ -1501,7 +1501,7 @@ static void intel_pstate_update_limits_for_all(void) int cpu; for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(all_cpu_data[cpu]); + intel_pstate_update_max_freq(cpu); mutex_lock(&hybrid_capacity_lock); @@ -1647,8 +1647,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) { struct cpudata *cpudata = all_cpu_data[cpu]; - unsigned int freq = cpudata->pstate.turbo_freq; struct freq_qos_request *req; + unsigned int freq; struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) @@ -1661,6 +1661,8 @@ static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) if (hwp_active) intel_pstate_get_hwp_cap(cpudata); + freq = cpudata->pstate.turbo_freq; + if (type == FREQ_QOS_MIN) { freq = DIV_ROUND_UP(freq * global.min_perf_pct, 100); } else { @@ -1908,7 +1910,7 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - if (intel_pstate_update_max_freq(cpudata)) { + if (intel_pstate_update_max_freq(cpudata->cpu)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 65fbb8e..c7876e9 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c
@@ -359,16 +359,6 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev, int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { - /* - * If there is only a single idle state (or none), there is nothing - * meaningful for the governor to choose. Skip the governor and - * always use state 0 with the tick running. - */ - if (drv->state_count <= 1) { - *stop_tick = false; - return 0; - } - return cpuidle_curr_governor->select(drv, dev, stop_tick); }
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 8adc7fe..98d1023 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c
@@ -52,9 +52,10 @@ static int atmel_sha204a_rng_read_nonblocking(struct hwrng *rng, void *data, rng->priv = 0; } else { work_data = kmalloc_obj(*work_data, GFP_ATOMIC); - if (!work_data) + if (!work_data) { + atomic_dec(&i2c_priv->tfm_count); return -ENOMEM; - + } work_data->ctx = i2c_priv; work_data->client = i2c_priv->client;
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 1673729..78964e1 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -3326,9 +3326,10 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, if (aligned_len < keylen) return -EOVERFLOW; - hashed_key = kmemdup(key, aligned_len, GFP_KERNEL); + hashed_key = kmalloc(aligned_len, GFP_KERNEL); if (!hashed_key) return -ENOMEM; + memcpy(hashed_key, key, keylen); ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); if (ret) goto bad_free_key;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 628c43a..4412220 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c
@@ -441,9 +441,10 @@ static int ahash_setkey(struct crypto_ahash *ahash, if (aligned_len < keylen) return -EOVERFLOW; - hashed_key = kmemdup(key, keylen, GFP_KERNEL); + hashed_key = kmalloc(aligned_len, GFP_KERNEL); if (!hashed_key) return -ENOMEM; + memcpy(hashed_key, key, keylen); ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); if (ret) goto bad_free_key;
diff --git a/drivers/crypto/ccp/sev-dev-tsm.c b/drivers/crypto/ccp/sev-dev-tsm.c index adc9542..b07ae52 100644 --- a/drivers/crypto/ccp/sev-dev-tsm.c +++ b/drivers/crypto/ccp/sev-dev-tsm.c
@@ -378,9 +378,9 @@ void sev_tsm_init_locked(struct sev_device *sev, void *tio_status_page) return; error_exit: - kfree(t); pr_err("Failed to enable SEV-TIO: ret=%d en=%d initdone=%d SEV=%d\n", ret, t->tio_en, t->tio_init_done, boot_cpu_has(X86_FEATURE_SEV)); + kfree(t); } void sev_tsm_uninit(struct sev_device *sev)
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 096f993..aebf4da 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c
@@ -1105,15 +1105,12 @@ struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages) { struct psp_device *psp_master = psp_get_master_device(); struct snp_hv_fixed_pages_entry *entry; - struct sev_device *sev; unsigned int order; struct page *page; - if (!psp_master || !psp_master->sev_data) + if (!psp_master) return NULL; - sev = psp_master->sev_data; - order = get_order(PMD_SIZE * num_2mb_pages); /* @@ -1126,7 +1123,8 @@ struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages) * This API uses SNP_INIT_EX to transition allocated pages to HV_Fixed * page state, fail if SNP is already initialized. */ - if (sev->snp_initialized) + if (psp_master->sev_data && + ((struct sev_device *)psp_master->sev_data)->snp_initialized) return NULL; /* Re-use freed pages that match the request */ @@ -1162,7 +1160,7 @@ void snp_free_hv_fixed_pages(struct page *page) struct psp_device *psp_master = psp_get_master_device(); struct snp_hv_fixed_pages_entry *entry, *nentry; - if (!psp_master || !psp_master->sev_data) + if (!psp_master) return; /* @@ -2410,10 +2408,8 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp) * in Firmware state on failure. Use snp_reclaim_pages() to * transition either case back to Hypervisor-owned state. */ - if (snp_reclaim_pages(__pa(data), 1, true)) { - snp_leak_pages(__page_to_pfn(status_page), 1); + if (snp_reclaim_pages(__pa(data), 1, true)) return -EFAULT; - } } if (ret)
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 329f60a..9214bbf 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c
@@ -332,6 +332,13 @@ static int __init padlock_init(void) if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) return -ENODEV; + /* + * Skip family 0x07 and newer used by Zhaoxin processors, + * as the driver's self-tests fail on these CPUs. + */ + if (c->x86 >= 0x07) + return -ENODEV; + /* Register the newly added algorithm module if on * * VIA Nano processor, or else just do as before */ if (c->x86_model < 0x0f) {
diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index 0e07d05..9210cce 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c
@@ -529,7 +529,7 @@ static struct tegra_se_alg tegra_aes_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-tegra", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_aes_ctx), .cra_alignmask = 0xf, @@ -550,7 +550,7 @@ static struct tegra_se_alg tegra_aes_algs[] = { .cra_name = "ecb(aes)", .cra_driver_name = "ecb-aes-tegra", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_aes_ctx), .cra_alignmask = 0xf, @@ -572,7 +572,7 @@ static struct tegra_se_alg tegra_aes_algs[] = { .cra_name = "ctr(aes)", .cra_driver_name = "ctr-aes-tegra", .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct tegra_aes_ctx), .cra_alignmask = 0xf, @@ -594,6 +594,7 @@ static struct tegra_se_alg tegra_aes_algs[] = { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-tegra", .cra_priority = 500, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_aes_ctx), .cra_alignmask = (__alignof__(u64) - 1), @@ -1922,6 +1923,7 @@ static struct tegra_se_alg tegra_aead_algs[] = { .cra_name = "gcm(aes)", .cra_driver_name = "gcm-aes-tegra", .cra_priority = 500, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct tegra_aead_ctx), .cra_alignmask = 0xf, @@ -1944,6 +1946,7 @@ static struct tegra_se_alg tegra_aead_algs[] = { .cra_name = "ccm(aes)", .cra_driver_name = "ccm-aes-tegra", .cra_priority = 500, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct tegra_aead_ctx), .cra_alignmask = 0xf, @@ -1971,7 +1974,7 @@ static struct tegra_se_alg tegra_cmac_algs[] = { .cra_name = "cmac(aes)", .cra_driver_name = "tegra-se-cmac", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_cmac_ctx), .cra_alignmask = 0,
diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c index 4a298ac..06bb5bf 100644 --- a/drivers/crypto/tegra/tegra-se-hash.c +++ b/drivers/crypto/tegra/tegra-se-hash.c
@@ -761,7 +761,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha1", .cra_driver_name = "tegra-se-sha1", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -786,7 +786,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha224", .cra_driver_name = "tegra-se-sha224", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -811,7 +811,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha256", .cra_driver_name = "tegra-se-sha256", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -836,7 +836,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha384", .cra_driver_name = "tegra-se-sha384", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -861,7 +861,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha512", .cra_driver_name = "tegra-se-sha512", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -886,7 +886,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha3-224", .cra_driver_name = "tegra-se-sha3-224", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA3_224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -911,7 +911,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha3-256", .cra_driver_name = "tegra-se-sha3-256", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA3_256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -936,7 +936,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha3-384", .cra_driver_name = "tegra-se-sha3-384", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA3_384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -961,7 +961,7 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "sha3-512", .cra_driver_name = "tegra-se-sha3-512", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = SHA3_512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -988,7 +988,8 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "hmac(sha224)", .cra_driver_name = "tegra-se-hmac-sha224", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -1015,7 +1016,8 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "hmac(sha256)", .cra_driver_name = "tegra-se-hmac-sha256", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -1042,7 +1044,8 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "hmac(sha384)", .cra_driver_name = "tegra-se-hmac-sha384", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0, @@ -1069,7 +1072,8 @@ static struct tegra_se_alg tegra_hash_algs[] = { .cra_name = "hmac(sha512)", .cra_driver_name = "tegra-se-hmac-sha512", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct tegra_sha_ctx), .cra_alignmask = 0,
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 4589bf1..80aeb0d 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig
@@ -59,6 +59,7 @@ tristate "CXL ACPI: Platform Support" depends on ACPI depends on ACPI_NUMA + depends on CXL_PMEM || !CXL_PMEM default CXL_BUS select ACPI_TABLE_LIB select ACPI_HMAT
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 007b8af..5b0570d 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h
@@ -152,6 +152,24 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +static inline struct device *port_to_host(struct cxl_port *port) +{ + struct cxl_port *parent = is_cxl_root(port) ? NULL : + to_cxl_port(port->dev.parent); + + /* + * The host of CXL root port and the first level of ports is + * the platform firmware device, the host of all other ports + * is their parent port. + */ + if (!parent) + return port->uport_dev; + else if (is_cxl_root(parent)) + return parent->uport_dev; + else + return &parent->dev; +} + static inline struct device *dport_to_host(struct cxl_dport *dport) { struct cxl_port *port = dport->port;
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e3f0c39..cb5d5a0 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c
@@ -94,7 +94,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *cxlhdm; void __iomem *hdm; u32 ctrl; - int i; if (!info) return false; @@ -113,22 +112,16 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) return false; /* - * If any decoders are committed already, there should not be any - * emulated DVSEC decoders. + * If HDM decoders are globally enabled, do not fall back to DVSEC + * range emulation. Zeroed decoder registers after region teardown + * do not imply absence of HDM capability. + * + * Falling back to DVSEC here would treat the decoder as AUTO and + * may incorrectly latch default interleave settings. */ - for (i = 0; i < cxlhdm->decoder_count; i++) { - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); - dev_dbg(&info->port->dev, - "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n", - info->port->id, i, - FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl), - readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)), - readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i))); - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) - return false; - } + ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + if (ctrl & CXL_HDM_DECODER_ENABLE) + return false; return true; } @@ -904,7 +897,7 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) return; - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_LOCK) return; if (port->commit_end == id)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c..12386d9 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c
@@ -311,6 +311,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * cxl_payload_from_user_allowed() - Check contents of in_payload. * @opcode: The mailbox command opcode. * @payload_in: Pointer to the input payload passed in from user space. + * @in_size: Size of @payload_in in bytes. * * Return: * * true - payload_in passes check for @opcode. @@ -325,12 +326,15 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * * The specific checks are determined by the opcode. */ -static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in, + size_t in_size) { switch (opcode) { case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; + if (in_size < sizeof(*pi)) + return false; if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; @@ -338,6 +342,8 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_CLEAR_LOG: { const uuid_t *uuid = (uuid_t *)payload_in; + if (in_size < sizeof(uuid_t)) + return false; /* * Restrict the ‘Clear log’ action to only apply to * Vendor debug logs. @@ -365,7 +371,8 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, if (IS_ERR(mbox_cmd->payload_in)) return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in, + in_size)) { dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); kvfree(mbox_cmd->payload_in); @@ -1294,7 +1301,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. */ - if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + if (cxlmd->dev.driver && cxl_num_decoders_committed(endpoint) == 0) return __cxl_mem_sanitize(mds, cmd); return -EBUSY;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f547d8a..273c221 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c
@@ -1089,10 +1089,8 @@ static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds) DEFINE_FREE(put_cxlmd, struct cxl_memdev *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) -static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +static bool cxl_memdev_attach_failed(struct cxl_memdev *cxlmd) { - int rc; - /* * If @attach is provided fail if the driver is not attached upon * return. Note that failure here could be the result of a race to @@ -1100,7 +1098,14 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) * succeeded and then cxl_mem unbound before the lock is acquired. */ guard(device)(&cxlmd->dev); - if (cxlmd->attach && !cxlmd->dev.driver) { + return (cxlmd->attach && !cxlmd->dev.driver); +} + +static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +{ + int rc; + + if (cxl_memdev_attach_failed(cxlmd)) { cxl_memdev_unregister(cxlmd); return ERR_PTR(-ENXIO); }
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 3c6e767..68462e3 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c
@@ -115,15 +115,17 @@ static void unregister_nvb(void *_cxl_nvb) device_unregister(&cxl_nvb->dev); } -/** - * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology - * @host: platform firmware root device - * @port: CXL port at the root of a CXL topology - * - * Return: bridge device that can host cxl_nvdimm objects - */ -struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, - struct cxl_port *port) +static bool cxl_nvdimm_bridge_failed_attach(struct cxl_nvdimm_bridge *cxl_nvb) +{ + struct device *dev = &cxl_nvb->dev; + + guard(device)(dev); + /* If the device has no driver, then it failed to attach. */ + return dev->driver == NULL; +} + +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; @@ -145,6 +147,11 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, if (rc) goto err; + if (cxl_nvdimm_bridge_failed_attach(cxl_nvb)) { + unregister_nvb(cxl_nvb); + return ERR_PTR(-ENODEV); + } + rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb); if (rc) return ERR_PTR(rc); @@ -155,7 +162,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, put_device(dev); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); +EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_nvdimm_bridge, "cxl_pmem"); static void cxl_nvdimm_release(struct device *dev) { @@ -255,6 +262,21 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, if (!cxl_nvb) return -ENODEV; + /* + * Take the uport_dev lock to guard against race of nvdimm_bus object. + * cxl_acpi_probe() registers the nvdimm_bus and is done under the + * root port uport_dev lock. + * + * Take the cxl_nvb device lock to ensure that cxl_nvb driver is in a + * consistent state. And the driver registers nvdimm_bus. + */ + guard(device)(cxl_nvb->port->uport_dev); + guard(device)(&cxl_nvb->dev); + if (!cxl_nvb->nvdimm_bus) { + rc = -ENODEV; + goto err_alloc; + } + cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd); if (IS_ERR(cxl_nvd)) { rc = PTR_ERR(cxl_nvd);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b69c252..c5aacd7 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c
@@ -552,10 +552,13 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - if (is_cxl_root(port)) + + if (is_cxl_root(port)) { kfree(to_cxl_root(port)); - else + } else { + put_device(dev->parent); kfree(port); + } } static ssize_t decoders_committed_show(struct device *dev, @@ -615,22 +618,8 @@ struct cxl_port *parent_port_of(struct cxl_port *port) static void unregister_port(void *_port) { struct cxl_port *port = _port; - struct cxl_port *parent = parent_port_of(port); - struct device *lock_dev; - /* - * CXL root port's and the first level of ports are unregistered - * under the platform firmware device lock, all other ports are - * unregistered while holding their parent port lock. - */ - if (!parent) - lock_dev = port->uport_dev; - else if (is_cxl_root(parent)) - lock_dev = parent->uport_dev; - else - lock_dev = &parent->dev; - - device_lock_assert(lock_dev); + device_lock_assert(port_to_host(port)); port->dead = true; device_unregister(&port->dev); } @@ -721,6 +710,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_port *iter; dev->parent = &parent_port->dev; + get_device(dev->parent); port->depth = parent_port->depth + 1; port->parent_dport = parent_dport; @@ -1427,20 +1417,11 @@ static struct device *grandparent(struct device *dev) return NULL; } -static struct device *endpoint_host(struct cxl_port *endpoint) -{ - struct cxl_port *port = to_cxl_port(endpoint->dev.parent); - - if (is_cxl_root(port)) - return port->uport_dev; - return &port->dev; -} - static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); scoped_guard(device, host) { if (host->driver && !endpoint->dead) { @@ -1456,7 +1437,7 @@ static void delete_endpoint(void *data) int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); struct device *dev = &cxlmd->dev; get_device(host); @@ -1790,7 +1771,16 @@ static struct cxl_dport *find_or_add_dport(struct cxl_port *port, { struct cxl_dport *dport; - device_lock_assert(&port->dev); + /* + * The port is already visible in CXL hierarchy, but it may still + * be in the process of binding to the CXL port driver at this point. + * + * port creation and driver binding are protected by the port's host + * lock, so acquire the host lock here to ensure the port has completed + * driver binding before proceeding with dport addition. + */ + guard(device)(port_to_host(port)); + guard(device)(&port->dev); dport = cxl_find_dport_by_dev(port, dport_dev); if (!dport) { dport = probe_dport(port, dport_dev); @@ -1857,13 +1847,11 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) * RP port enumerated by cxl_acpi without dport will * have the dport added here. */ - scoped_guard(device, &port->dev) { - dport = find_or_add_dport(port, dport_dev); - if (IS_ERR(dport)) { - if (PTR_ERR(dport) == -EAGAIN) - goto retry; - return PTR_ERR(dport); - } + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); } rc = cxl_add_ep(dport, &cxlmd->dev);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index fec37af..c37ae0b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c
@@ -1100,12 +1100,12 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, static void cxl_region_setup_flags(struct cxl_region *cxlr, struct cxl_decoder *cxld) { - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) { + if (cxld->flags & CXL_DECODER_F_LOCK) { set_bit(CXL_REGION_F_LOCK, &cxlr->flags); clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } - if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_NORMALIZED_ADDRESSING) set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags); } @@ -3854,8 +3854,10 @@ static int __construct_region(struct cxl_region *cxlr, } rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group); - if (rc) + if (rc) { + kfree(res); return rc; + } rc = insert_resource(cxlrd->res, res); if (rc) {
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 04c673e..9b94728 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h
@@ -574,11 +574,16 @@ struct cxl_nvdimm_bridge { #define CXL_DEV_ID_LEN 19 +enum { + CXL_NVD_F_INVALIDATED = 0, +}; + struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ u64 dirty_shutdowns; + unsigned long flags; }; struct cxl_pmem_region_mapping { @@ -920,6 +925,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev); struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port,
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 6a97e4e..261dff7 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c
@@ -13,6 +13,20 @@ static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); +/** + * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology + * @host: platform firmware root device + * @port: CXL port at the root of a CXL topology + * + * Return: bridge device that can host cxl_nvdimm objects + */ +struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) +{ + return __devm_cxl_add_nvdimm_bridge(host, port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); + static void clear_exclusive(void *mds) { clear_exclusive_cxl_commands(mds, exclusive_cmds); @@ -129,6 +143,9 @@ static int cxl_nvdimm_probe(struct device *dev) struct nvdimm *nvdimm; int rc; + if (test_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags)) + return -EBUSY; + set_exclusive_cxl_commands(mds, exclusive_cmds); rc = devm_add_action_or_reset(dev, clear_exclusive, mds); if (rc) @@ -309,8 +326,10 @@ static int detach_nvdimm(struct device *dev, void *data) scoped_guard(device, dev) { if (dev->driver) { cxl_nvd = to_cxl_nvdimm(dev); - if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) + if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) { release = true; + set_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags); + } } } if (release) @@ -353,6 +372,7 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = { .probe = cxl_nvdimm_bridge_probe, .id = CXL_DEVICE_NVDIMM_BRIDGE, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .suppress_bind_attrs = true, }, }; @@ -534,7 +554,7 @@ static __exit void cxl_pmem_exit(void) MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); -module_init(cxl_pmem_init); +subsys_initcall(cxl_pmem_init); module_exit(cxl_pmem_exit); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index e7d698b..5397dbd 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -844,6 +844,7 @@ static int dw_edma_irq_request(struct dw_edma *dw, { struct dw_edma_chip *chip = dw->chip; struct device *dev = dw->chip->dev; + struct msi_desc *msi_desc; u32 wr_mask = 1; u32 rd_mask = 1; int i, err = 0; @@ -895,9 +896,12 @@ static int dw_edma_irq_request(struct dw_edma *dw, &dw->irq[i]); if (err) goto err_irq_free; - - if (irq_get_msi_desc(irq)) + msi_desc = irq_get_msi_desc(irq); + if (msi_desc) { get_cached_msi_msg(irq, &dw->irq[i].msi); + if (!msi_desc->pci.msi_attrib.is_msix) + dw->irq[i].msi.data = dw->irq[0].msi.data + i; + } } dw->nr_irqs = i;
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index e3f8db4..ce8f725 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -252,10 +252,10 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) lower_32_bits(chunk->ll_region.paddr)); SET_CH_32(dw, chan->dir, chan->id, llp.msb, upper_32_bits(chunk->ll_region.paddr)); + /* Set consumer cycle */ + SET_CH_32(dw, chan->dir, chan->id, cycle_sync, + HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); } - /* Set consumer cycle */ - SET_CH_32(dw, chan->dir, chan->id, cycle_sync, - HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); dw_hdma_v0_sync_ll_data(chunk);
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index dbcdd1e..b596baa 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c
@@ -317,10 +317,8 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, return NULL; i = fsl_chan - fsl_edma->chans; - fsl_chan->priority = dma_spec->args[1]; - fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX; - fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE; - fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO; + if (!b_chmux && i != dma_spec->args[0]) + continue; if ((dma_spec->args[2] & FSL_EDMA_EVEN_CH) && (i & 0x1)) continue; @@ -328,17 +326,15 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, if ((dma_spec->args[2] & FSL_EDMA_ODD_CH) && !(i & 0x1)) continue; - if (!b_chmux && i == dma_spec->args[0]) { - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; - return chan; - } else if (b_chmux && !fsl_chan->srcid) { - /* if controller support channel mux, choose a free channel */ - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; - fsl_chan->srcid = dma_spec->args[0]; - return chan; - } + fsl_chan->srcid = dma_spec->args[0]; + fsl_chan->priority = dma_spec->args[1]; + fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX; + fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE; + fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO; + + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; + return chan; } return NULL; }
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c37d233..0366c7c 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c
@@ -158,11 +158,7 @@ static const struct device_type idxd_cdev_file_type = { static void idxd_cdev_dev_release(struct device *dev) { struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); - struct idxd_cdev_context *cdev_ctx; - struct idxd_wq *wq = idxd_cdev->wq; - cdev_ctx = &ictx[wq->idxd->data->type]; - ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor); kfree(idxd_cdev); } @@ -582,11 +578,15 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) void idxd_wq_del_cdev(struct idxd_wq *wq) { + struct idxd_cdev_context *cdev_ctx; struct idxd_cdev *idxd_cdev; idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); + + cdev_ctx = &ictx[wq->idxd->data->type]; + ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor); put_device(cdev_dev(idxd_cdev)); }
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c261285..1311384 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c
@@ -175,6 +175,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_descs(wq); dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); sbitmap_queue_free(&wq->sbq); + wq->type = IDXD_WQT_NONE; } EXPORT_SYMBOL_NS_GPL(idxd_wq_free_resources, "IDXD"); @@ -382,7 +383,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); wq->state = IDXD_WQ_DISABLED; memset(wq->wqcfg, 0, idxd->wqcfg_size); - wq->type = IDXD_WQT_NONE; wq->threshold = 0; wq->priority = 0; wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; @@ -831,8 +831,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; struct idxd_evl *evl = idxd->evl; - gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - if (!gencfg.evl_en) + if (!evl) return; mutex_lock(&evl->lock); @@ -1125,7 +1124,11 @@ int idxd_device_config(struct idxd_device *idxd) { int rc; - lockdep_assert_held(&idxd->dev_lock); + guard(spinlock)(&idxd->dev_lock); + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return 0; + rc = idxd_wqs_setup(idxd); if (rc < 0) return rc; @@ -1332,6 +1335,11 @@ void idxd_wq_free_irq(struct idxd_wq *wq) free_irq(ie->vector, ie); idxd_flush_pending_descs(ie); + + /* The interrupt might have been already released by FLR */ + if (ie->int_handle == INVALID_INT_HANDLE) + return; + if (idxd->request_int_handles) idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); idxd_device_clear_perm_entry(idxd, ie); @@ -1340,6 +1348,23 @@ void idxd_wq_free_irq(struct idxd_wq *wq) ie->pasid = IOMMU_PASID_INVALID; } +void idxd_wq_flush_descs(struct idxd_wq *wq) +{ + struct idxd_irq_entry *ie = &wq->ie; + struct idxd_device *idxd = wq->idxd; + + guard(mutex)(&wq->wq_lock); + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) + return; + + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->int_handle = INVALID_INT_HANDLE; +} + int idxd_wq_request_irq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -1454,11 +1479,7 @@ int idxd_drv_enable_wq(struct idxd_wq *wq) } } - rc = 0; - spin_lock(&idxd->dev_lock); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock(&idxd->dev_lock); + rc = idxd_device_config(idxd); if (rc < 0) { dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); goto err; @@ -1533,7 +1554,6 @@ void idxd_drv_disable_wq(struct idxd_wq *wq) idxd_wq_reset(wq); idxd_wq_free_resources(wq); percpu_ref_exit(&wq->wq_active); - wq->type = IDXD_WQT_NONE; wq->client_count = 0; } EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD"); @@ -1554,10 +1574,7 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) } /* Device configuration */ - spin_lock(&idxd->dev_lock); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock(&idxd->dev_lock); + rc = idxd_device_config(idxd); if (rc < 0) return -ENXIO;
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index dbecd69..9937b67 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c
@@ -194,6 +194,22 @@ static void idxd_dma_release(struct dma_device *device) kfree(idxd_dma); } +static int idxd_dma_terminate_all(struct dma_chan *c) +{ + struct idxd_wq *wq = to_idxd_wq(c); + + idxd_wq_flush_descs(wq); + + return 0; +} + +static void idxd_dma_synchronize(struct dma_chan *c) +{ + struct idxd_wq *wq = to_idxd_wq(c); + + idxd_wq_drain(wq); +} + int idxd_register_dma_device(struct idxd_device *idxd) { struct idxd_dma_dev *idxd_dma; @@ -224,6 +240,8 @@ int idxd_register_dma_device(struct idxd_device *idxd) dma->device_issue_pending = idxd_dma_issue_pending; dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; dma->device_free_chan_resources = idxd_dma_free_chan_resources; + dma->device_terminate_all = idxd_dma_terminate_all; + dma->device_synchronize = idxd_dma_synchronize; rc = dma_async_device_register(dma); if (rc < 0) {
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ea8c4da..ce78b9a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h
@@ -803,6 +803,7 @@ void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); void idxd_wq_free_irq(struct idxd_wq *wq); int idxd_wq_request_irq(struct idxd_wq *wq); +void idxd_wq_flush_descs(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fb80803..f1cfc77 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c
@@ -973,7 +973,8 @@ static void idxd_device_config_restore(struct idxd_device *idxd, idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit; - idxd->evl->size = saved_evl->size; + if (idxd->evl) + idxd->evl->size = saved_evl->size; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *saved_group, *group; @@ -1104,12 +1105,10 @@ static void idxd_reset_done(struct pci_dev *pdev) idxd_device_config_restore(idxd, idxd->idxd_saved); /* Re-configure IDXD device if allowed. */ - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { - rc = idxd_device_config(idxd); - if (rc < 0) { - dev_err(dev, "HALT: %s config fails\n", idxd_name); - goto out; - } + rc = idxd_device_config(idxd); + if (rc < 0) { + dev_err(dev, "HALT: %s config fails\n", idxd_name); + goto out; } /* Bind IDXD device to driver. */ @@ -1147,6 +1146,7 @@ static void idxd_reset_done(struct pci_dev *pdev) } out: kfree(idxd->idxd_saved); + idxd->idxd_saved = NULL; } static const struct pci_error_handlers idxd_error_handler = {
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 7782f8c..6a25e1f 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c
@@ -397,6 +397,17 @@ static void idxd_device_flr(struct work_struct *work) dev_err(&idxd->pdev->dev, "FLR failed\n"); } +static void idxd_wqs_flush_descs(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + idxd_wq_flush_descs(wq); + } +} + static irqreturn_t idxd_halt(struct idxd_device *idxd) { union gensts_reg gensts; @@ -415,6 +426,11 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd) } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) { idxd->state = IDXD_DEV_HALTED; idxd_mask_error_interrupts(idxd); + /* Flush all pending descriptors, and disable + * interrupts, they will be re-enabled when FLR + * concludes. + */ + idxd_wqs_flush_descs(idxd); dev_dbg(&idxd->pdev->dev, "idxd halted, doing FLR. After FLR, configs are restored\n"); INIT_WORK(&idxd->work, idxd_device_flr);
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 6db1c5f..0321704 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c
@@ -138,7 +138,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); - idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true, + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true, NULL, NULL); } }
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index cc2c83d..6d251095 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c
@@ -1836,6 +1836,7 @@ static void idxd_conf_device_release(struct device *dev) { struct idxd_device *idxd = confdev_to_idxd(dev); + destroy_workqueue(idxd->wq); kfree(idxd->groups); bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs);
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index d84ca55..f30bdf6 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c
@@ -10,6 +10,7 @@ */ #include <linux/bitfield.h> +#include <linux/cleanup.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> #include <linux/interrupt.h> @@ -296,13 +297,10 @@ static void rz_dmac_disable_hw(struct rz_dmac_chan *channel) { struct dma_chan *chan = &channel->vc.chan; struct rz_dmac *dmac = to_rz_dmac(chan->device); - unsigned long flags; dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index); - local_irq_save(flags); rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); - local_irq_restore(flags); } static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars) @@ -447,6 +445,7 @@ static int rz_dmac_alloc_chan_resources(struct dma_chan *chan) if (!desc) break; + /* No need to lock. This is called only for the 1st client. */ list_add_tail(&desc->node, &channel->ld_free); channel->descs_allocated++; } @@ -502,18 +501,21 @@ rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n", __func__, channel->index, &src, &dest, len); - if (list_empty(&channel->ld_free)) - return NULL; + scoped_guard(spinlock_irqsave, &channel->vc.lock) { + if (list_empty(&channel->ld_free)) + return NULL; - desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); - desc->type = RZ_DMAC_DESC_MEMCPY; - desc->src = src; - desc->dest = dest; - desc->len = len; - desc->direction = DMA_MEM_TO_MEM; + desc->type = RZ_DMAC_DESC_MEMCPY; + desc->src = src; + desc->dest = dest; + desc->len = len; + desc->direction = DMA_MEM_TO_MEM; - list_move_tail(channel->ld_free.next, &channel->ld_queue); + list_move_tail(channel->ld_free.next, &channel->ld_queue); + } + return vchan_tx_prep(&channel->vc, &desc->vd, flags); } @@ -529,27 +531,29 @@ rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, int dma_length = 0; int i = 0; - if (list_empty(&channel->ld_free)) - return NULL; + scoped_guard(spinlock_irqsave, &channel->vc.lock) { + if (list_empty(&channel->ld_free)) + return NULL; - desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); - for_each_sg(sgl, sg, sg_len, i) { - dma_length += sg_dma_len(sg); + for_each_sg(sgl, sg, sg_len, i) + dma_length += sg_dma_len(sg); + + desc->type = RZ_DMAC_DESC_SLAVE_SG; + desc->sg = sgl; + desc->sgcount = sg_len; + desc->len = dma_length; + desc->direction = direction; + + if (direction == DMA_DEV_TO_MEM) + desc->src = channel->src_per_address; + else + desc->dest = channel->dst_per_address; + + list_move_tail(channel->ld_free.next, &channel->ld_queue); } - desc->type = RZ_DMAC_DESC_SLAVE_SG; - desc->sg = sgl; - desc->sgcount = sg_len; - desc->len = dma_length; - desc->direction = direction; - - if (direction == DMA_DEV_TO_MEM) - desc->src = channel->src_per_address; - else - desc->dest = channel->dst_per_address; - - list_move_tail(channel->ld_free.next, &channel->ld_queue); return vchan_tx_prep(&channel->vc, &desc->vd, flags); } @@ -561,8 +565,8 @@ static int rz_dmac_terminate_all(struct dma_chan *chan) unsigned int i; LIST_HEAD(head); - rz_dmac_disable_hw(channel); spin_lock_irqsave(&channel->vc.lock, flags); + rz_dmac_disable_hw(channel); for (i = 0; i < DMAC_NR_LMDESC; i++) lmdesc[i].header = 0; @@ -699,7 +703,9 @@ static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel) if (chstat & CHSTAT_ER) { dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n", channel->index, chstat); - rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); + + scoped_guard(spinlock_irqsave, &channel->vc.lock) + rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); goto done; }
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index d02a4da..782a55e 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c
@@ -1234,8 +1234,8 @@ static int xdma_probe(struct platform_device *pdev) xdev->rmap = devm_regmap_init_mmio(&pdev->dev, reg_base, &xdma_regmap_config); - if (!xdev->rmap) { - xdma_err(xdev, "config regmap failed: %d", ret); + if (IS_ERR(xdev->rmap)) { + xdma_err(xdev, "config regmap failed: %pe", xdev->rmap); goto failed; } INIT_LIST_HEAD(&xdev->dma_dev.channels);
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index b53292e..e3a18ee 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -997,16 +997,16 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, struct xilinx_cdma_tx_segment, node); cdma_hw = &cdma_seg->hw; - residue += (cdma_hw->control - cdma_hw->status) & - chan->xdev->max_buffer_len; + residue += (cdma_hw->control & chan->xdev->max_buffer_len) - + (cdma_hw->status & chan->xdev->max_buffer_len); } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { axidma_seg = list_entry(entry, struct xilinx_axidma_tx_segment, node); axidma_hw = &axidma_seg->hw; - residue += (axidma_hw->control - axidma_hw->status) & - chan->xdev->max_buffer_len; + residue += (axidma_hw->control & chan->xdev->max_buffer_len) - + (axidma_hw->status & chan->xdev->max_buffer_len); } else { aximcdma_seg = list_entry(entry, @@ -1014,8 +1014,8 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, node); aximcdma_hw = &aximcdma_seg->hw; residue += - (aximcdma_hw->control - aximcdma_hw->status) & - chan->xdev->max_buffer_len; + (aximcdma_hw->control & chan->xdev->max_buffer_len) - + (aximcdma_hw->status & chan->xdev->max_buffer_len); } } @@ -1235,14 +1235,6 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) dma_cookie_init(dchan); - if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { - /* For AXI DMA resetting once channel will reset the - * other channel as well so enable the interrupts here. - */ - dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, - XILINX_DMA_DMAXR_ALL_IRQ_MASK); - } - if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_CDMA_CR_SGMODE); @@ -1564,8 +1556,29 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) if (chan->err) return; - if (list_empty(&chan->pending_list)) + if (list_empty(&chan->pending_list)) { + if (chan->cyclic) { + struct xilinx_dma_tx_descriptor *desc; + struct list_head *entry; + + desc = list_last_entry(&chan->done_list, + struct xilinx_dma_tx_descriptor, node); + list_for_each(entry, &desc->segments) { + struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_axidma_desc_hw *axidma_hw; + axidma_seg = list_entry(entry, + struct xilinx_axidma_tx_segment, + node); + axidma_hw = &axidma_seg->hw; + axidma_hw->status = 0; + } + + list_splice_tail_init(&chan->done_list, &chan->active_list); + chan->desc_pendingcount = 0; + chan->idle = false; + } return; + } if (!chan->idle) return; @@ -1591,6 +1604,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) head_desc->async_tx.phys); reg &= ~XILINX_DMA_CR_DELAY_MAX; reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT; + reg |= XILINX_DMA_DMAXR_ALL_IRQ_MASK; dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); xilinx_dma_start(chan); @@ -3024,7 +3038,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, return -EINVAL; } - xdev->common.directions |= chan->direction; + xdev->common.directions |= BIT(chan->direction); /* Request the interrupt */ chan->irq = of_irq_get(node, chan->tdest);
diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 63bd971..37f3c33 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c
@@ -981,11 +981,7 @@ zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) } /* Add devres action to release DPLL related resources */ - rc = devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); - if (rc) - goto error; - - return 0; + return devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); error: zl3073x_dev_dpll_fini(zldev); @@ -1026,6 +1022,7 @@ int zl3073x_dev_probe(struct zl3073x_dev *zldev, "Unknown or non-match chip ID: 0x%0x\n", id); } + zldev->chip_id = id; /* Read revision, firmware version and custom config version */ rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision);
diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index dddfcac..fd2af3c 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h
@@ -35,6 +35,7 @@ struct zl3073x_dpll; * @dev: pointer to device * @regmap: regmap to access device registers * @multiop_lock: to serialize multiple register operations + * @chip_id: chip ID read from hardware * @ref: array of input references' invariants * @out: array of outs' invariants * @synth: array of synths' invariants @@ -48,6 +49,7 @@ struct zl3073x_dev { struct device *dev; struct regmap *regmap; struct mutex multiop_lock; + u16 chip_id; /* Invariants */ struct zl3073x_ref ref[ZL3073X_NUM_REFS]; @@ -144,6 +146,32 @@ int zl3073x_write_hwreg_seq(struct zl3073x_dev *zldev, int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel); +/** + * zl3073x_dev_is_ref_phase_comp_32bit - check ref phase comp register size + * @zldev: pointer to zl3073x device + * + * Some chip IDs have a 32-bit wide ref_phase_offset_comp register instead + * of the default 48-bit. + * + * Return: true if the register is 32-bit, false if 48-bit + */ +static inline bool +zl3073x_dev_is_ref_phase_comp_32bit(struct zl3073x_dev *zldev) +{ + switch (zldev->chip_id) { + case 0x0E30: + case 0x0E93: + case 0x0E94: + case 0x0E95: + case 0x0E96: + case 0x0E97: + case 0x1F60: + return true; + default: + return false; + } +} + static inline bool zl3073x_is_n_pin(u8 id) {
diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index d33175d..aaa14ea 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c
@@ -475,8 +475,11 @@ zl3073x_dpll_input_pin_phase_adjust_get(const struct dpll_pin *dpll_pin, ref_id = zl3073x_input_pin_ref_get(pin->id); ref = zl3073x_ref_state_get(zldev, ref_id); - /* Perform sign extension for 48bit signed value */ - phase_comp = sign_extend64(ref->phase_comp, 47); + /* Perform sign extension based on register width */ + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + phase_comp = sign_extend64(ref->phase_comp, 31); + else + phase_comp = sign_extend64(ref->phase_comp, 47); /* Reverse two's complement negation applied during set and convert * to 32bit signed int
diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c index aa2de13..6b65e61 100644 --- a/drivers/dpll/zl3073x/ref.c +++ b/drivers/dpll/zl3073x/ref.c
@@ -121,8 +121,16 @@ int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) return rc; /* Read phase compensation register */ - rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - &ref->phase_comp); + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) { + u32 val; + + rc = zl3073x_read_u32(zldev, ZL_REG_REF_PHASE_OFFSET_COMP_32, + &val); + ref->phase_comp = val; + } else { + rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, + &ref->phase_comp); + } if (rc) return rc; @@ -179,9 +187,16 @@ int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, if (!rc && dref->sync_ctrl != ref->sync_ctrl) rc = zl3073x_write_u8(zldev, ZL_REG_REF_SYNC_CTRL, ref->sync_ctrl); - if (!rc && dref->phase_comp != ref->phase_comp) - rc = zl3073x_write_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - ref->phase_comp); + if (!rc && dref->phase_comp != ref->phase_comp) { + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + rc = zl3073x_write_u32(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP_32, + ref->phase_comp); + else + rc = zl3073x_write_u48(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP, + ref->phase_comp); + } if (rc) return rc;
diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index d837bee..5573d71 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h
@@ -194,6 +194,7 @@ #define ZL_REF_CONFIG_DIFF_EN BIT(2) #define ZL_REG_REF_PHASE_OFFSET_COMP ZL_REG(10, 0x28, 6) +#define ZL_REG_REF_PHASE_OFFSET_COMP_32 ZL_REG(10, 0x28, 4) #define ZL_REG_REF_SYNC_CTRL ZL_REG(10, 0x2e, 1) #define ZL_REF_SYNC_CTRL_MODE GENMASK(2, 0)
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index f1a2bee..82b3b6d 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c
@@ -257,9 +257,10 @@ static void fwnet_header_cache_update(struct hh_cache *hh, memcpy((u8 *)hh->hh_data + HH_DATA_OFF(FWNET_HLEN), haddr, net->addr_len); } -static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int fwnet_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { - memcpy(haddr, skb->dev->dev_addr, FWNET_ALEN); + memcpy(haddr, dev->dev_addr, FWNET_ALEN); return FWNET_ALEN; }
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 1c868c1..8153d62 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c
@@ -848,7 +848,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, { struct device *dev = ohci->card.device; unsigned int i; - struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES]; + struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES] = { NULL }; dma_addr_t dma_addrs[AR_BUFFERS]; void *vaddr; struct descriptor *d;
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 12a6253..f2f94d4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c
@@ -205,12 +205,12 @@ static int ffa_rxtx_map(phys_addr_t tx_buf, phys_addr_t rx_buf, u32 pg_cnt) return 0; } -static int ffa_rxtx_unmap(u16 vm_id) +static int ffa_rxtx_unmap(void) { ffa_value_t ret; invoke_ffa_fn((ffa_value_t){ - .a0 = FFA_RXTX_UNMAP, .a1 = PACK_TARGET_INFO(vm_id, 0), + .a0 = FFA_RXTX_UNMAP, }, &ret); if (ret.a0 == FFA_ERROR) @@ -2097,7 +2097,7 @@ static int __init ffa_init(void) pr_err("failed to setup partitions\n"); ffa_notifications_cleanup(); - ffa_rxtx_unmap(drv_info->vm_id); + ffa_rxtx_unmap(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, rxtx_bufsz); @@ -2112,7 +2112,7 @@ static void __exit ffa_exit(void) { ffa_notifications_cleanup(); ffa_partitions_cleanup(); - ffa_rxtx_unmap(drv_info->vm_id); + ffa_rxtx_unmap(); free_pages_exact(drv_info->tx_buffer, drv_info->rxtx_bufsz); free_pages_exact(drv_info->rx_buffer, drv_info->rxtx_bufsz); kfree(drv_info);
diff --git a/drivers/firmware/arm_scmi/notify.c b/drivers/firmware/arm_scmi/notify.c index 9168794..40ec184 100644 --- a/drivers/firmware/arm_scmi/notify.c +++ b/drivers/firmware/arm_scmi/notify.c
@@ -1066,7 +1066,7 @@ static int scmi_register_event_handler(struct scmi_notify_instance *ni, * since at creation time we usually want to have all setup and ready before * events really start flowing. * - * Return: A properly refcounted handler on Success, NULL on Failure + * Return: A properly refcounted handler on Success, ERR_PTR on Failure */ static inline struct scmi_event_handler * __scmi_event_handler_get_ops(struct scmi_notify_instance *ni, @@ -1113,7 +1113,7 @@ __scmi_event_handler_get_ops(struct scmi_notify_instance *ni, } mutex_unlock(&ni->pending_mtx); - return hndl; + return hndl ?: ERR_PTR(-ENODEV); } static struct scmi_event_handler *
diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 4c75970..f51245a 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h
@@ -189,13 +189,13 @@ struct scmi_protocol_handle { /** * struct scmi_iterator_state - Iterator current state descriptor - * @desc_index: Starting index for the current mulit-part request. + * @desc_index: Starting index for the current multi-part request. * @num_returned: Number of returned items in the last multi-part reply. * @num_remaining: Number of remaining items in the multi-part message. * @max_resources: Maximum acceptable number of items, configured by the caller * depending on the underlying resources that it is querying. * @loop_idx: The iterator loop index in the current multi-part reply. - * @rx_len: Size in bytes of the currenly processed message; it can be used by + * @rx_len: Size in bytes of the currently processed message; it can be used by * the user of the iterator to verify a reply size. * @priv: Optional pointer to some additional state-related private data setup * by the caller during the iterations.
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index 00e7444..2acad5f 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c
@@ -18,6 +18,7 @@ #include <linux/bitmap.h> #include <linux/bitfield.h> +#include <linux/cleanup.h> #include <linux/device.h> #include <linux/err.h> #include <linux/export.h> @@ -940,13 +941,13 @@ static int scpi_probe(struct platform_device *pdev) int idx = scpi_drvinfo->num_chans; struct scpi_chan *pchan = scpi_drvinfo->channels + idx; struct mbox_client *cl = &pchan->cl; - struct device_node *shmem = of_parse_phandle(np, "shmem", idx); + struct device_node *shmem __free(device_node) = + of_parse_phandle(np, "shmem", idx); if (!of_match_node(shmem_of_match, shmem)) return -ENXIO; ret = of_address_to_resource(shmem, 0, &res); - of_node_put(shmem); if (ret) { dev_err(dev, "failed to get SCPI payload mem resource\n"); return ret;
diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index b4f1c01..5d8be0a 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -1610,11 +1610,17 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, region_name); if (reg) { + /* + * Although we expect the underlying bus does not require + * physically-contiguous buffers, we pessimistically use + * a temporary buffer instead of trusting that the + * alignment of region->data is ok. + */ region_len = le32_to_cpu(region->len); if (region_len > buf_len) { buf_len = round_up(region_len, PAGE_SIZE); - kfree(buf); - buf = kmalloc(buf_len, GFP_KERNEL | GFP_DMA); + vfree(buf); + buf = vmalloc(buf_len); if (!buf) { ret = -ENOMEM; goto out_fw; @@ -1643,7 +1649,7 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, ret = 0; out_fw: - kfree(buf); + vfree(buf); if (ret == -EOVERFLOW) cs_dsp_err(dsp, "%s: file content overflows file data\n", file); @@ -2331,11 +2337,17 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware } if (reg) { + /* + * Although we expect the underlying bus does not require + * physically-contiguous buffers, we pessimistically use + * a temporary buffer instead of trusting that the + * alignment of blk->data is ok. + */ region_len = le32_to_cpu(blk->len); if (region_len > buf_len) { buf_len = round_up(region_len, PAGE_SIZE); - kfree(buf); - buf = kmalloc(buf_len, GFP_KERNEL | GFP_DMA); + vfree(buf); + buf = vmalloc(buf_len); if (!buf) { ret = -ENOMEM; goto out_fw; @@ -2366,7 +2378,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware ret = 0; out_fw: - kfree(buf); + vfree(buf); if (ret == -EOVERFLOW) cs_dsp_err(dsp, "%s: file content overflows file data\n", file);
diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 4ff0c29..6842aa9 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c
@@ -85,7 +85,7 @@ static struct kobject *mokvar_kobj; * as an alternative to ordinary EFI variables, due to platform-dependent * limitations. The memory occupied by this table is marked as reserved. * - * This routine must be called before efi_free_boot_services() in order + * This routine must be called before efi_unmap_boot_services() in order * to guarantee that it can mark the table as reserved. * * Implicit inputs:
diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 46b19d8..1211fd8 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c
@@ -113,10 +113,6 @@ static enum fw_upload_err mpfs_auto_update_prepare(struct fw_upload *fw_uploader * be added here. */ - priv->flash = mpfs_sys_controller_get_flash(priv->sys_controller); - if (!priv->flash) - return FW_UPLOAD_ERR_HW_ERROR; - erase_size = round_up(erase_size, (u64)priv->flash->erasesize); /* @@ -427,6 +423,12 @@ static int mpfs_auto_update_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(priv->sys_controller), "Could not register as a sub device of the system controller\n"); + priv->flash = mpfs_sys_controller_get_flash(priv->sys_controller); + if (IS_ERR_OR_NULL(priv->flash)) { + dev_dbg(dev, "No flash connected to the system controller, auto-update not supported\n"); + return -ENODEV; + } + priv->dev = dev; platform_set_drvdata(pdev, priv);
diff --git a/drivers/firmware/stratix10-rsu.c b/drivers/firmware/stratix10-rsu.c index 41da07c..e191210 100644 --- a/drivers/firmware/stratix10-rsu.c +++ b/drivers/firmware/stratix10-rsu.c
@@ -768,7 +768,9 @@ static int stratix10_rsu_probe(struct platform_device *pdev) rsu_async_status_callback); if (ret) { dev_err(dev, "Error, getting RSU status %i\n", ret); + stratix10_svc_remove_async_client(priv->chan); stratix10_svc_free_channel(priv->chan); + return ret; } /* get DCMF version from firmware */
diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 6f5c298..e9e35d6 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c
@@ -37,15 +37,14 @@ * service layer will return error to FPGA manager when timeout occurs, * timeout is set to 30 seconds (30 * 1000) at Intel Stratix10 SoC. */ -#define SVC_NUM_DATA_IN_FIFO 32 +#define SVC_NUM_DATA_IN_FIFO 8 #define SVC_NUM_CHANNEL 4 -#define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 200 +#define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 2000 #define FPGA_CONFIG_STATUS_TIMEOUT_SEC 30 #define BYTE_TO_WORD_SIZE 4 /* stratix10 service layer clients */ #define STRATIX10_RSU "stratix10-rsu" -#define INTEL_FCS "intel-fcs" /* Maximum number of SDM client IDs. */ #define MAX_SDM_CLIENT_IDS 16 @@ -105,11 +104,9 @@ struct stratix10_svc_chan; /** * struct stratix10_svc - svc private data * @stratix10_svc_rsu: pointer to stratix10 RSU device - * @intel_svc_fcs: pointer to the FCS device */ struct stratix10_svc { struct platform_device *stratix10_svc_rsu; - struct platform_device *intel_svc_fcs; }; /** @@ -251,12 +248,10 @@ struct stratix10_async_ctrl { * @num_active_client: number of active service client * @node: list management * @genpool: memory pool pointing to the memory region - * @task: pointer to the thread task which handles SMC or HVC call - * @svc_fifo: a queue for storing service message data * @complete_status: state for completion - * @svc_fifo_lock: protect access to service message data queue * @invoke_fn: function to issue secure monitor call or hypervisor call * @svc: manages the list of client svc drivers + * @sdm_lock: only allows a single command single response to SDM * @actrl: async control structure * * This struct is used to create communication channels for service clients, to @@ -269,12 +264,10 @@ struct stratix10_svc_controller { int num_active_client; struct list_head node; struct gen_pool *genpool; - struct task_struct *task; - struct kfifo svc_fifo; struct completion complete_status; - spinlock_t svc_fifo_lock; svc_invoke_fn *invoke_fn; struct stratix10_svc *svc; + struct mutex sdm_lock; struct stratix10_async_ctrl actrl; }; @@ -283,6 +276,9 @@ struct stratix10_svc_controller { * @ctrl: pointer to service controller which is the provider of this channel * @scl: pointer to service client which owns the channel * @name: service client name associated with the channel + * @task: pointer to the thread task which handles SMC or HVC call + * @svc_fifo: a queue for storing service message data (separate fifo for every channel) + * @svc_fifo_lock: protect access to service message data queue (locking pending fifo) * @lock: protect access to the channel * @async_chan: reference to asynchronous channel object for this channel * @@ -293,6 +289,9 @@ struct stratix10_svc_chan { struct stratix10_svc_controller *ctrl; struct stratix10_svc_client *scl; char *name; + struct task_struct *task; + struct kfifo svc_fifo; + spinlock_t svc_fifo_lock; spinlock_t lock; struct stratix10_async_chan *async_chan; }; @@ -527,10 +526,10 @@ static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data, */ static int svc_normal_to_secure_thread(void *data) { - struct stratix10_svc_controller - *ctrl = (struct stratix10_svc_controller *)data; - struct stratix10_svc_data *pdata; - struct stratix10_svc_cb_data *cbdata; + struct stratix10_svc_chan *chan = (struct stratix10_svc_chan *)data; + struct stratix10_svc_controller *ctrl = chan->ctrl; + struct stratix10_svc_data *pdata = NULL; + struct stratix10_svc_cb_data *cbdata = NULL; struct arm_smccc_res res; unsigned long a0, a1, a2, a3, a4, a5, a6, a7; int ret_fifo = 0; @@ -555,12 +554,12 @@ static int svc_normal_to_secure_thread(void *data) a6 = 0; a7 = 0; - pr_debug("smc_hvc_shm_thread is running\n"); + pr_debug("%s: %s: Thread is running!\n", __func__, chan->name); while (!kthread_should_stop()) { - ret_fifo = kfifo_out_spinlocked(&ctrl->svc_fifo, + ret_fifo = kfifo_out_spinlocked(&chan->svc_fifo, pdata, sizeof(*pdata), - &ctrl->svc_fifo_lock); + &chan->svc_fifo_lock); if (!ret_fifo) continue; @@ -569,9 +568,25 @@ static int svc_normal_to_secure_thread(void *data) (unsigned int)pdata->paddr, pdata->command, (unsigned int)pdata->size); + /* SDM can only process one command at a time */ + pr_debug("%s: %s: Thread is waiting for mutex!\n", + __func__, chan->name); + if (mutex_lock_interruptible(&ctrl->sdm_lock)) { + /* item already dequeued; notify client to unblock it */ + cbdata->status = BIT(SVC_STATUS_ERROR); + cbdata->kaddr1 = NULL; + cbdata->kaddr2 = NULL; + cbdata->kaddr3 = NULL; + if (pdata->chan->scl) + pdata->chan->scl->receive_cb(pdata->chan->scl, + cbdata); + break; + } + switch (pdata->command) { case COMMAND_RECONFIG_DATA_CLAIM: svc_thread_cmd_data_claim(ctrl, pdata, cbdata); + mutex_unlock(&ctrl->sdm_lock); continue; case COMMAND_RECONFIG: a0 = INTEL_SIP_SMC_FPGA_CONFIG_START; @@ -700,10 +715,11 @@ static int svc_normal_to_secure_thread(void *data) break; default: pr_warn("it shouldn't happen\n"); - break; + mutex_unlock(&ctrl->sdm_lock); + continue; } - pr_debug("%s: before SMC call -- a0=0x%016x a1=0x%016x", - __func__, + pr_debug("%s: %s: before SMC call -- a0=0x%016x a1=0x%016x", + __func__, chan->name, (unsigned int)a0, (unsigned int)a1); pr_debug(" a2=0x%016x\n", (unsigned int)a2); @@ -712,8 +728,8 @@ static int svc_normal_to_secure_thread(void *data) pr_debug(" a5=0x%016x\n", (unsigned int)a5); ctrl->invoke_fn(a0, a1, a2, a3, a4, a5, a6, a7, &res); - pr_debug("%s: after SMC call -- res.a0=0x%016x", - __func__, (unsigned int)res.a0); + pr_debug("%s: %s: after SMC call -- res.a0=0x%016x", + __func__, chan->name, (unsigned int)res.a0); pr_debug(" res.a1=0x%016x, res.a2=0x%016x", (unsigned int)res.a1, (unsigned int)res.a2); pr_debug(" res.a3=0x%016x\n", (unsigned int)res.a3); @@ -728,6 +744,7 @@ static int svc_normal_to_secure_thread(void *data) cbdata->kaddr2 = NULL; cbdata->kaddr3 = NULL; pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); + mutex_unlock(&ctrl->sdm_lock); continue; } @@ -801,6 +818,8 @@ static int svc_normal_to_secure_thread(void *data) break; } + + mutex_unlock(&ctrl->sdm_lock); } kfree(cbdata); @@ -1696,22 +1715,33 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg) if (!p_data) return -ENOMEM; - /* first client will create kernel thread */ - if (!chan->ctrl->task) { - chan->ctrl->task = - kthread_run_on_cpu(svc_normal_to_secure_thread, - (void *)chan->ctrl, - cpu, "svc_smc_hvc_thread"); - if (IS_ERR(chan->ctrl->task)) { + /* first caller creates the per-channel kthread */ + if (!chan->task) { + struct task_struct *task; + + task = kthread_run_on_cpu(svc_normal_to_secure_thread, + (void *)chan, + cpu, "svc_smc_hvc_thread"); + if (IS_ERR(task)) { dev_err(chan->ctrl->dev, "failed to create svc_smc_hvc_thread\n"); kfree(p_data); return -EINVAL; } + + spin_lock(&chan->lock); + if (chan->task) { + /* another caller won the race; discard our thread */ + spin_unlock(&chan->lock); + kthread_stop(task); + } else { + chan->task = task; + spin_unlock(&chan->lock); + } } - pr_debug("%s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__, - p_msg->payload, p_msg->command, + pr_debug("%s: %s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__, + chan->name, p_msg->payload, p_msg->command, (unsigned int)p_msg->payload_length); if (list_empty(&svc_data_mem)) { @@ -1747,12 +1777,16 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg) p_data->arg[2] = p_msg->arg[2]; p_data->size = p_msg->payload_length; p_data->chan = chan; - pr_debug("%s: put to FIFO pa=0x%016x, cmd=%x, size=%u\n", __func__, - (unsigned int)p_data->paddr, p_data->command, - (unsigned int)p_data->size); - ret = kfifo_in_spinlocked(&chan->ctrl->svc_fifo, p_data, + pr_debug("%s: %s: put to FIFO pa=0x%016x, cmd=%x, size=%u\n", + __func__, + chan->name, + (unsigned int)p_data->paddr, + p_data->command, + (unsigned int)p_data->size); + + ret = kfifo_in_spinlocked(&chan->svc_fifo, p_data, sizeof(*p_data), - &chan->ctrl->svc_fifo_lock); + &chan->svc_fifo_lock); kfree(p_data); @@ -1773,11 +1807,12 @@ EXPORT_SYMBOL_GPL(stratix10_svc_send); */ void stratix10_svc_done(struct stratix10_svc_chan *chan) { - /* stop thread when thread is running AND only one active client */ - if (chan->ctrl->task && chan->ctrl->num_active_client <= 1) { - pr_debug("svc_smc_hvc_shm_thread is stopped\n"); - kthread_stop(chan->ctrl->task); - chan->ctrl->task = NULL; + /* stop thread when thread is running */ + if (chan->task) { + pr_debug("%s: %s: svc_smc_hvc_shm_thread is stopping\n", + __func__, chan->name); + kthread_stop(chan->task); + chan->task = NULL; } } EXPORT_SYMBOL_GPL(stratix10_svc_done); @@ -1817,8 +1852,8 @@ void *stratix10_svc_allocate_memory(struct stratix10_svc_chan *chan, pmem->paddr = pa; pmem->size = s; list_add_tail(&pmem->node, &svc_data_mem); - pr_debug("%s: va=%p, pa=0x%016x\n", __func__, - pmem->vaddr, (unsigned int)pmem->paddr); + pr_debug("%s: %s: va=%p, pa=0x%016x\n", __func__, + chan->name, pmem->vaddr, (unsigned int)pmem->paddr); return (void *)va; } @@ -1855,6 +1890,13 @@ static const struct of_device_id stratix10_svc_drv_match[] = { {}, }; +static const char * const chan_names[SVC_NUM_CHANNEL] = { + SVC_CLIENT_FPGA, + SVC_CLIENT_RSU, + SVC_CLIENT_FCS, + SVC_CLIENT_HWMON +}; + static int stratix10_svc_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1862,11 +1904,11 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) struct stratix10_svc_chan *chans; struct gen_pool *genpool; struct stratix10_svc_sh_memory *sh_memory; - struct stratix10_svc *svc; + struct stratix10_svc *svc = NULL; svc_invoke_fn *invoke_fn; size_t fifo_size; - int ret; + int ret, i = 0; /* get SMC or HVC function */ invoke_fn = get_invoke_func(dev); @@ -1905,8 +1947,8 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) controller->num_active_client = 0; controller->chans = chans; controller->genpool = genpool; - controller->task = NULL; controller->invoke_fn = invoke_fn; + INIT_LIST_HEAD(&controller->node); init_completion(&controller->complete_status); ret = stratix10_svc_async_init(controller); @@ -1917,32 +1959,20 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) } fifo_size = sizeof(struct stratix10_svc_data) * SVC_NUM_DATA_IN_FIFO; - ret = kfifo_alloc(&controller->svc_fifo, fifo_size, GFP_KERNEL); - if (ret) { - dev_err(dev, "failed to allocate FIFO\n"); - goto err_async_exit; + mutex_init(&controller->sdm_lock); + + for (i = 0; i < SVC_NUM_CHANNEL; i++) { + chans[i].scl = NULL; + chans[i].ctrl = controller; + chans[i].name = (char *)chan_names[i]; + spin_lock_init(&chans[i].lock); + ret = kfifo_alloc(&chans[i].svc_fifo, fifo_size, GFP_KERNEL); + if (ret) { + dev_err(dev, "failed to allocate FIFO %d\n", i); + goto err_free_fifos; + } + spin_lock_init(&chans[i].svc_fifo_lock); } - spin_lock_init(&controller->svc_fifo_lock); - - chans[0].scl = NULL; - chans[0].ctrl = controller; - chans[0].name = SVC_CLIENT_FPGA; - spin_lock_init(&chans[0].lock); - - chans[1].scl = NULL; - chans[1].ctrl = controller; - chans[1].name = SVC_CLIENT_RSU; - spin_lock_init(&chans[1].lock); - - chans[2].scl = NULL; - chans[2].ctrl = controller; - chans[2].name = SVC_CLIENT_FCS; - spin_lock_init(&chans[2].lock); - - chans[3].scl = NULL; - chans[3].ctrl = controller; - chans[3].name = SVC_CLIENT_HWMON; - spin_lock_init(&chans[3].lock); list_add_tail(&controller->node, &svc_ctrl); platform_set_drvdata(pdev, controller); @@ -1951,7 +1981,7 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) svc = devm_kzalloc(dev, sizeof(*svc), GFP_KERNEL); if (!svc) { ret = -ENOMEM; - goto err_free_kfifo; + goto err_free_fifos; } controller->svc = svc; @@ -1959,51 +1989,43 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->stratix10_svc_rsu) { dev_err(dev, "failed to allocate %s device\n", STRATIX10_RSU); ret = -ENOMEM; - goto err_free_kfifo; + goto err_free_fifos; } ret = platform_device_add(svc->stratix10_svc_rsu); - if (ret) { - platform_device_put(svc->stratix10_svc_rsu); - goto err_free_kfifo; - } - - svc->intel_svc_fcs = platform_device_alloc(INTEL_FCS, 1); - if (!svc->intel_svc_fcs) { - dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); - ret = -ENOMEM; - goto err_unregister_rsu_dev; - } - - ret = platform_device_add(svc->intel_svc_fcs); - if (ret) { - platform_device_put(svc->intel_svc_fcs); - goto err_unregister_rsu_dev; - } + if (ret) + goto err_put_device; ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); if (ret) - goto err_unregister_fcs_dev; + goto err_unregister_rsu_dev; pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_fcs_dev: - platform_device_unregister(svc->intel_svc_fcs); err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); -err_free_kfifo: - kfifo_free(&controller->svc_fifo); -err_async_exit: + goto err_free_fifos; +err_put_device: + platform_device_put(svc->stratix10_svc_rsu); +err_free_fifos: + /* only remove from list if list_add_tail() was reached */ + if (!list_empty(&controller->node)) + list_del(&controller->node); + /* free only the FIFOs that were successfully allocated */ + while (i--) + kfifo_free(&chans[i].svc_fifo); stratix10_svc_async_exit(controller); err_destroy_pool: gen_pool_destroy(genpool); + return ret; } static void stratix10_svc_drv_remove(struct platform_device *pdev) { + int i; struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); struct stratix10_svc *svc = ctrl->svc; @@ -2011,14 +2033,16 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) of_platform_depopulate(ctrl->dev); - platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); - kfifo_free(&ctrl->svc_fifo); - if (ctrl->task) { - kthread_stop(ctrl->task); - ctrl->task = NULL; + for (i = 0; i < SVC_NUM_CHANNEL; i++) { + if (ctrl->chans[i].task) { + kthread_stop(ctrl->chans[i].task); + ctrl->chans[i].task = NULL; + } + kfifo_free(&ctrl->chans[i].svc_fifo); } + if (ctrl->genpool) gen_pool_destroy(ctrl->genpool); list_del(&ctrl->node);
diff --git a/drivers/gpib/Kconfig b/drivers/gpib/Kconfig index eeb5095..d43a28c 100644 --- a/drivers/gpib/Kconfig +++ b/drivers/gpib/Kconfig
@@ -122,6 +122,7 @@ depends on OF select GPIB_COMMON select GPIB_NEC7210 + depends on HAS_IOMEM help GPIB driver for Fluke based cda devices.
diff --git a/drivers/gpib/common/gpib_os.c b/drivers/gpib/common/gpib_os.c index be757db..97c98f0 100644 --- a/drivers/gpib/common/gpib_os.c +++ b/drivers/gpib/common/gpib_os.c
@@ -888,10 +888,6 @@ static int read_ioctl(struct gpib_file_private *file_priv, struct gpib_board *bo if (read_cmd.completed_transfer_count > read_cmd.requested_transfer_count) return -EINVAL; - desc = handle_to_descriptor(file_priv, read_cmd.handle); - if (!desc) - return -EINVAL; - if (WARN_ON_ONCE(sizeof(userbuf) > sizeof(read_cmd.buffer_ptr))) return -EFAULT; @@ -904,6 +900,17 @@ static int read_ioctl(struct gpib_file_private *file_priv, struct gpib_board *bo if (!access_ok(userbuf, remain)) return -EFAULT; + /* Lock descriptors to prevent concurrent close from freeing descriptor */ + if (mutex_lock_interruptible(&file_priv->descriptors_mutex)) + return -ERESTARTSYS; + desc = handle_to_descriptor(file_priv, read_cmd.handle); + if (!desc) { + mutex_unlock(&file_priv->descriptors_mutex); + return -EINVAL; + } + atomic_inc(&desc->descriptor_busy); + mutex_unlock(&file_priv->descriptors_mutex); + atomic_set(&desc->io_in_progress, 1); /* Read buffer loads till we fill the user supplied buffer */ @@ -937,6 +944,7 @@ static int read_ioctl(struct gpib_file_private *file_priv, struct gpib_board *bo retval = copy_to_user((void __user *)arg, &read_cmd, sizeof(read_cmd)); atomic_set(&desc->io_in_progress, 0); + atomic_dec(&desc->descriptor_busy); wake_up_interruptible(&board->wait); if (retval) @@ -964,10 +972,6 @@ static int command_ioctl(struct gpib_file_private *file_priv, if (cmd.completed_transfer_count > cmd.requested_transfer_count) return -EINVAL; - desc = handle_to_descriptor(file_priv, cmd.handle); - if (!desc) - return -EINVAL; - userbuf = (u8 __user *)(unsigned long)cmd.buffer_ptr; userbuf += cmd.completed_transfer_count; @@ -980,6 +984,17 @@ static int command_ioctl(struct gpib_file_private *file_priv, if (!access_ok(userbuf, remain)) return -EFAULT; + /* Lock descriptors to prevent concurrent close from freeing descriptor */ + if (mutex_lock_interruptible(&file_priv->descriptors_mutex)) + return -ERESTARTSYS; + desc = handle_to_descriptor(file_priv, cmd.handle); + if (!desc) { + mutex_unlock(&file_priv->descriptors_mutex); + return -EINVAL; + } + atomic_inc(&desc->descriptor_busy); + mutex_unlock(&file_priv->descriptors_mutex); + /* * Write buffer loads till we empty the user supplied buffer. * Call drivers at least once, even if remain is zero, in @@ -1003,6 +1018,7 @@ static int command_ioctl(struct gpib_file_private *file_priv, userbuf += bytes_written; if (retval < 0) { atomic_set(&desc->io_in_progress, 0); + atomic_dec(&desc->descriptor_busy); wake_up_interruptible(&board->wait); break; @@ -1022,6 +1038,7 @@ static int command_ioctl(struct gpib_file_private *file_priv, */ if (!no_clear_io_in_prog || fault) atomic_set(&desc->io_in_progress, 0); + atomic_dec(&desc->descriptor_busy); wake_up_interruptible(&board->wait); if (fault) @@ -1047,10 +1064,6 @@ static int write_ioctl(struct gpib_file_private *file_priv, struct gpib_board *b if (write_cmd.completed_transfer_count > write_cmd.requested_transfer_count) return -EINVAL; - desc = handle_to_descriptor(file_priv, write_cmd.handle); - if (!desc) - return -EINVAL; - userbuf = (u8 __user *)(unsigned long)write_cmd.buffer_ptr; userbuf += write_cmd.completed_transfer_count; @@ -1060,6 +1073,17 @@ static int write_ioctl(struct gpib_file_private *file_priv, struct gpib_board *b if (!access_ok(userbuf, remain)) return -EFAULT; + /* Lock descriptors to prevent concurrent close from freeing descriptor */ + if (mutex_lock_interruptible(&file_priv->descriptors_mutex)) + return -ERESTARTSYS; + desc = handle_to_descriptor(file_priv, write_cmd.handle); + if (!desc) { + mutex_unlock(&file_priv->descriptors_mutex); + return -EINVAL; + } + atomic_inc(&desc->descriptor_busy); + mutex_unlock(&file_priv->descriptors_mutex); + atomic_set(&desc->io_in_progress, 1); /* Write buffer loads till we empty the user supplied buffer */ @@ -1094,6 +1118,7 @@ static int write_ioctl(struct gpib_file_private *file_priv, struct gpib_board *b fault = copy_to_user((void __user *)arg, &write_cmd, sizeof(write_cmd)); atomic_set(&desc->io_in_progress, 0); + atomic_dec(&desc->descriptor_busy); wake_up_interruptible(&board->wait); if (fault) @@ -1276,6 +1301,9 @@ static int close_dev_ioctl(struct file *filep, struct gpib_board *board, unsigne { struct gpib_close_dev_ioctl cmd; struct gpib_file_private *file_priv = filep->private_data; + struct gpib_descriptor *desc; + unsigned int pad; + int sad; int retval; retval = copy_from_user(&cmd, (void __user *)arg, sizeof(cmd)); @@ -1284,19 +1312,27 @@ static int close_dev_ioctl(struct file *filep, struct gpib_board *board, unsigne if (cmd.handle >= GPIB_MAX_NUM_DESCRIPTORS) return -EINVAL; - if (!file_priv->descriptors[cmd.handle]) + + mutex_lock(&file_priv->descriptors_mutex); + desc = file_priv->descriptors[cmd.handle]; + if (!desc) { + mutex_unlock(&file_priv->descriptors_mutex); return -EINVAL; - - retval = decrement_open_device_count(board, &board->device_list, - file_priv->descriptors[cmd.handle]->pad, - file_priv->descriptors[cmd.handle]->sad); - if (retval < 0) - return retval; - - kfree(file_priv->descriptors[cmd.handle]); + } + if (atomic_read(&desc->descriptor_busy)) { + mutex_unlock(&file_priv->descriptors_mutex); + return -EBUSY; + } + /* Remove from table while holding lock to prevent new IO from starting */ file_priv->descriptors[cmd.handle] = NULL; + pad = desc->pad; + sad = desc->sad; + mutex_unlock(&file_priv->descriptors_mutex); - return 0; + retval = decrement_open_device_count(board, &board->device_list, pad, sad); + + kfree(desc); + return retval; } static int serial_poll_ioctl(struct gpib_board *board, unsigned long arg) @@ -1331,12 +1367,25 @@ static int wait_ioctl(struct gpib_file_private *file_priv, struct gpib_board *bo if (retval) return -EFAULT; + /* + * Lock descriptors to prevent concurrent close from freeing + * descriptor. ibwait() releases big_gpib_mutex when wait_mask + * is non-zero, so desc must be pinned with descriptor_busy. + */ + mutex_lock(&file_priv->descriptors_mutex); desc = handle_to_descriptor(file_priv, wait_cmd.handle); - if (!desc) + if (!desc) { + mutex_unlock(&file_priv->descriptors_mutex); return -EINVAL; + } + atomic_inc(&desc->descriptor_busy); + mutex_unlock(&file_priv->descriptors_mutex); retval = ibwait(board, wait_cmd.wait_mask, wait_cmd.clear_mask, wait_cmd.set_mask, &wait_cmd.ibsta, wait_cmd.usec_timeout, desc); + + atomic_dec(&desc->descriptor_busy); + if (retval < 0) return retval; @@ -2035,6 +2084,7 @@ void init_gpib_descriptor(struct gpib_descriptor *desc) desc->is_board = 0; desc->autopoll_enabled = 0; atomic_set(&desc->io_in_progress, 0); + atomic_set(&desc->descriptor_busy, 0); } int gpib_register_driver(struct gpib_interface *interface, struct module *provider_module)
diff --git a/drivers/gpib/include/gpib_types.h b/drivers/gpib/include/gpib_types.h index 5a0978a..28b7315 100644 --- a/drivers/gpib/include/gpib_types.h +++ b/drivers/gpib/include/gpib_types.h
@@ -364,6 +364,14 @@ struct gpib_descriptor { unsigned int pad; /* primary gpib address */ int sad; /* secondary gpib address (negative means disabled) */ atomic_t io_in_progress; + /* + * Kernel-only reference count to prevent descriptor from being + * freed while IO handlers hold a pointer to it. Incremented + * before each IO operation, decremented when done. Unlike + * io_in_progress, this cannot be modified from userspace via + * general_ibstatus(). + */ + atomic_t descriptor_busy; unsigned is_board : 1; unsigned autopoll_enabled : 1; };
diff --git a/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 6fc4e34..0f9d385 100644 --- a/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c
@@ -38,8 +38,10 @@ MODULE_DESCRIPTION("GPIB driver for LPVO usb devices"); /* * Table of devices that work with this driver. * - * Currently, only one device is known to be used in the - * lpvo_usb_gpib adapter (FTDI 0403:6001). + * Currently, only one device is known to be used in the lpvo_usb_gpib + * adapter (FTDI 0403:6001) but as this device id is already handled by the + * ftdi_sio USB serial driver the LPVO driver must not bind to it by default. + * * If your adapter uses a different chip, insert a line * in the following table with proper <Vendor-id>, <Product-id>. * @@ -50,7 +52,6 @@ MODULE_DESCRIPTION("GPIB driver for LPVO usb devices"); */ static const struct usb_device_id skel_table[] = { - { USB_DEVICE(0x0403, 0x6001) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, skel_table); @@ -405,7 +406,7 @@ static int usb_gpib_attach(struct gpib_board *board, const struct gpib_board_con for (j = 0 ; j < MAX_DEV ; j++) { if ((assigned_usb_minors & 1 << j) == 0) continue; - udev = usb_get_dev(interface_to_usbdev(lpvo_usb_interfaces[j])); + udev = interface_to_usbdev(lpvo_usb_interfaces[j]); device_path = kobject_get_path(&udev->dev.kobj, GFP_KERNEL); match = gpib_match_device_path(&lpvo_usb_interfaces[j]->dev, config->device_path); @@ -420,7 +421,7 @@ static int usb_gpib_attach(struct gpib_board *board, const struct gpib_board_con for (j = 0 ; j < MAX_DEV ; j++) { if ((assigned_usb_minors & 1 << j) == 0) continue; - udev = usb_get_dev(interface_to_usbdev(lpvo_usb_interfaces[j])); + udev = interface_to_usbdev(lpvo_usb_interfaces[j]); DIA_LOG(1, "dev. %d: bus %d -> %d dev: %d -> %d\n", j, udev->bus->busnum, config->pci_bus, udev->devnum, config->pci_slot); if (config->pci_bus == udev->bus->busnum &&
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index d7666fe..647b6f4 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c
@@ -584,12 +584,13 @@ static bool mxc_gpio_set_pad_wakeup(struct mxc_gpio_port *port, bool enable) unsigned long config; bool ret = false; int i, type; + bool is_imx8qm = of_device_is_compatible(port->dev->of_node, "fsl,imx8qm-gpio"); static const u32 pad_type_map[] = { IMX_SCU_WAKEUP_OFF, /* 0 */ IMX_SCU_WAKEUP_RISE_EDGE, /* IRQ_TYPE_EDGE_RISING */ IMX_SCU_WAKEUP_FALL_EDGE, /* IRQ_TYPE_EDGE_FALLING */ - IMX_SCU_WAKEUP_FALL_EDGE, /* IRQ_TYPE_EDGE_BOTH */ + IMX_SCU_WAKEUP_RISE_EDGE, /* IRQ_TYPE_EDGE_BOTH */ IMX_SCU_WAKEUP_HIGH_LVL, /* IRQ_TYPE_LEVEL_HIGH */ IMX_SCU_WAKEUP_OFF, /* 5 */ IMX_SCU_WAKEUP_OFF, /* 6 */ @@ -604,6 +605,13 @@ static bool mxc_gpio_set_pad_wakeup(struct mxc_gpio_port *port, bool enable) config = pad_type_map[type]; else config = IMX_SCU_WAKEUP_OFF; + + if (is_imx8qm && config == IMX_SCU_WAKEUP_FALL_EDGE) { + dev_warn_once(port->dev, + "No falling-edge support for wakeup on i.MX8QM\n"); + config = IMX_SCU_WAKEUP_OFF; + } + ret |= mxc_gpio_generic_config(port, i, config); } }
diff --git a/drivers/gpio/gpio-qixis-fpga.c b/drivers/gpio/gpio-qixis-fpga.c index 6e67f43..3ced47d 100644 --- a/drivers/gpio/gpio-qixis-fpga.c +++ b/drivers/gpio/gpio-qixis-fpga.c
@@ -60,8 +60,8 @@ static int qixis_cpld_gpio_probe(struct platform_device *pdev) return PTR_ERR(reg); regmap = devm_regmap_init_mmio(&pdev->dev, reg, ®map_config_8r_8v); - if (!regmap) - return -ENODEV; + if (IS_ERR(regmap)) + return PTR_ERR(regmap); /* In this case, the offset of our register is 0 inside the * regmap area that we just created.
diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c index d2614ac..e02d6b9 100644 --- a/drivers/gpio/gpiolib-shared.c +++ b/drivers/gpio/gpiolib-shared.c
@@ -443,8 +443,8 @@ static bool gpio_shared_dev_is_reset_gpio(struct device *consumer, } #endif /* CONFIG_RESET_GPIO */ -int gpio_shared_add_proxy_lookup(struct device *consumer, const char *con_id, - unsigned long lflags) +int gpio_shared_add_proxy_lookup(struct device *consumer, struct fwnode_handle *fwnode, + const char *con_id, unsigned long lflags) { const char *dev_id = dev_name(consumer); struct gpiod_lookup_table *lookup; @@ -458,7 +458,7 @@ int gpio_shared_add_proxy_lookup(struct device *consumer, const char *con_id, if (!ref->fwnode && device_is_compatible(consumer, "reset-gpio")) { if (!gpio_shared_dev_is_reset_gpio(consumer, entry, ref)) continue; - } else if (!device_match_fwnode(consumer, ref->fwnode)) { + } else if (fwnode != ref->fwnode) { continue; } @@ -506,8 +506,9 @@ static void gpio_shared_remove_adev(struct auxiliary_device *adev) auxiliary_device_uninit(adev); } -int gpio_device_setup_shared(struct gpio_device *gdev) +int gpiochip_setup_shared(struct gpio_chip *gc) { + struct gpio_device *gdev = gc->gpiodev; struct gpio_shared_entry *entry; struct gpio_shared_ref *ref; struct gpio_desc *desc; @@ -538,20 +539,42 @@ int gpio_device_setup_shared(struct gpio_device *gdev) if (list_count_nodes(&entry->refs) <= 1) continue; - desc = &gdev->descs[entry->offset]; + scoped_guard(mutex, &entry->lock) { +#if IS_ENABLED(CONFIG_OF) + if (is_of_node(entry->fwnode) && gc->of_xlate) { + /* + * This is the earliest that we can tranlate the + * devicetree offset to the chip offset. + */ + struct of_phandle_args gpiospec = { }; - __set_bit(GPIOD_FLAG_SHARED, &desc->flags); - /* - * Shared GPIOs are not requested via the normal path. Make - * them inaccessible to anyone even before we register the - * chip. - */ - ret = gpiod_request_commit(desc, "shared"); - if (ret) - return ret; + gpiospec.np = to_of_node(entry->fwnode); + gpiospec.args_count = 2; + gpiospec.args[0] = entry->offset; - pr_debug("GPIO %u owned by %s is shared by multiple consumers\n", - entry->offset, gpio_device_get_label(gdev)); + ret = gc->of_xlate(gc, &gpiospec, NULL); + if (ret < 0) + return ret; + + entry->offset = ret; + } +#endif /* CONFIG_OF */ + + desc = &gdev->descs[entry->offset]; + + __set_bit(GPIOD_FLAG_SHARED, &desc->flags); + /* + * Shared GPIOs are not requested via the normal path. Make + * them inaccessible to anyone even before we register the + * chip. + */ + ret = gpiod_request_commit(desc, "shared"); + if (ret) + return ret; + + pr_debug("GPIO %u owned by %s is shared by multiple consumers\n", + entry->offset, gpio_device_get_label(gdev)); + } list_for_each_entry(ref, &entry->refs, list) { pr_debug("Setting up a shared GPIO entry for %s (con_id: '%s')\n", @@ -575,6 +598,8 @@ void gpio_device_teardown_shared(struct gpio_device *gdev) struct gpio_shared_ref *ref; list_for_each_entry(entry, &gpio_shared_list, list) { + guard(mutex)(&entry->lock); + if (!device_match_fwnode(&gdev->dev, entry->fwnode)) continue; @@ -748,14 +773,14 @@ static bool gpio_shared_entry_is_really_shared(struct gpio_shared_entry *entry) static void gpio_shared_free_exclusive(void) { struct gpio_shared_entry *entry, *epos; + struct gpio_shared_ref *ref, *rpos; list_for_each_entry_safe(entry, epos, &gpio_shared_list, list) { if (gpio_shared_entry_is_really_shared(entry)) continue; - gpio_shared_drop_ref(list_first_entry(&entry->refs, - struct gpio_shared_ref, - list)); + list_for_each_entry_safe(ref, rpos, &entry->refs, list) + gpio_shared_drop_ref(ref); gpio_shared_drop_entry(entry); } }
diff --git a/drivers/gpio/gpiolib-shared.h b/drivers/gpio/gpiolib-shared.h index 40568ef..15e72a8 100644 --- a/drivers/gpio/gpiolib-shared.h +++ b/drivers/gpio/gpiolib-shared.h
@@ -11,17 +11,19 @@ struct gpio_device; struct gpio_desc; struct device; +struct fwnode_handle; #if IS_ENABLED(CONFIG_GPIO_SHARED) -int gpio_device_setup_shared(struct gpio_device *gdev); +int gpiochip_setup_shared(struct gpio_chip *gc); void gpio_device_teardown_shared(struct gpio_device *gdev); -int gpio_shared_add_proxy_lookup(struct device *consumer, const char *con_id, - unsigned long lflags); +int gpio_shared_add_proxy_lookup(struct device *consumer, + struct fwnode_handle *fwnode, + const char *con_id, unsigned long lflags); #else -static inline int gpio_device_setup_shared(struct gpio_device *gdev) +static inline int gpiochip_setup_shared(struct gpio_chip *gc) { return 0; } @@ -29,6 +31,7 @@ static inline int gpio_device_setup_shared(struct gpio_device *gdev) static inline void gpio_device_teardown_shared(struct gpio_device *gdev) { } static inline int gpio_shared_add_proxy_lookup(struct device *consumer, + struct fwnode_handle *fwnode, const char *con_id, unsigned long lflags) {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 86a171e..300de30 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c
@@ -892,13 +892,15 @@ static const struct device_type gpio_dev_type = { #define gcdev_unregister(gdev) device_del(&(gdev)->dev) #endif +/* + * An initial reference count has been held in gpiochip_add_data_with_key(). + * The caller should drop the reference via gpio_device_put() on errors. + */ static int gpiochip_setup_dev(struct gpio_device *gdev) { struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); int ret; - device_initialize(&gdev->dev); - /* * If fwnode doesn't belong to another device, it's safe to clear its * initialized flag. @@ -964,9 +966,11 @@ static void gpiochip_setup_devs(void) list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { ret = gpiochip_setup_dev(gdev); - if (ret) + if (ret) { + gpio_device_put(gdev); dev_err(&gdev->dev, "Failed to initialize gpio device (%d)\n", ret); + } } } @@ -1047,33 +1051,65 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, int base = 0; int ret; - /* - * First: allocate and populate the internal stat container, and - * set up the struct device. - */ gdev = kzalloc(sizeof(*gdev), GFP_KERNEL); if (!gdev) return -ENOMEM; - - gdev->dev.type = &gpio_dev_type; - gdev->dev.bus = &gpio_bus_type; - gdev->dev.parent = gc->parent; - rcu_assign_pointer(gdev->chip, gc); - gc->gpiodev = gdev; gpiochip_set_data(gc, data); - device_set_node(&gdev->dev, gpiochip_choose_fwnode(gc)); - ret = ida_alloc(&gpio_ida, GFP_KERNEL); if (ret < 0) goto err_free_gdev; gdev->id = ret; - ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id); + ret = init_srcu_struct(&gdev->srcu); if (ret) goto err_free_ida; + rcu_assign_pointer(gdev->chip, gc); + ret = init_srcu_struct(&gdev->desc_srcu); + if (ret) + goto err_cleanup_gdev_srcu; + + ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id); + if (ret) + goto err_cleanup_desc_srcu; + + device_initialize(&gdev->dev); + /* + * After this point any allocated resources to `gdev` will be + * free():ed by gpiodev_release(). If you add new resources + * then make sure they get free():ed there. + */ + gdev->dev.type = &gpio_dev_type; + gdev->dev.bus = &gpio_bus_type; + gdev->dev.parent = gc->parent; + device_set_node(&gdev->dev, gpiochip_choose_fwnode(gc)); + + ret = gpiochip_get_ngpios(gc, &gdev->dev); + if (ret) + goto err_put_device; + gdev->ngpio = gc->ngpio; + + gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); + if (!gdev->descs) { + ret = -ENOMEM; + goto err_put_device; + } + + gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); + if (!gdev->label) { + ret = -ENOMEM; + goto err_put_device; + } + + gdev->can_sleep = gc->can_sleep; + rwlock_init(&gdev->line_state_lock); + RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); + BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); +#ifdef CONFIG_PINCTRL + INIT_LIST_HEAD(&gdev->pin_ranges); +#endif if (gc->parent && gc->parent->driver) gdev->owner = gc->parent->driver->owner; else if (gc->owner) @@ -1082,37 +1118,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, else gdev->owner = THIS_MODULE; - ret = gpiochip_get_ngpios(gc, &gdev->dev); - if (ret) - goto err_free_dev_name; - - gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); - if (!gdev->descs) { - ret = -ENOMEM; - goto err_free_dev_name; - } - - gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); - if (!gdev->label) { - ret = -ENOMEM; - goto err_free_descs; - } - - gdev->ngpio = gc->ngpio; - gdev->can_sleep = gc->can_sleep; - - rwlock_init(&gdev->line_state_lock); - RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); - BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); - - ret = init_srcu_struct(&gdev->srcu); - if (ret) - goto err_free_label; - - ret = init_srcu_struct(&gdev->desc_srcu); - if (ret) - goto err_cleanup_gdev_srcu; - scoped_guard(mutex, &gpio_devices_lock) { /* * TODO: this allocates a Linux GPIO number base in the global @@ -1127,7 +1132,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (base < 0) { ret = base; base = 0; - goto err_cleanup_desc_srcu; + goto err_put_device; } /* @@ -1147,14 +1152,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiodev_add_to_list_unlocked(gdev); if (ret) { gpiochip_err(gc, "GPIO integer space overlap, cannot add chip\n"); - goto err_cleanup_desc_srcu; + goto err_put_device; } } -#ifdef CONFIG_PINCTRL - INIT_LIST_HEAD(&gdev->pin_ranges); -#endif - if (gc->names) gpiochip_set_desc_names(gc); @@ -1210,7 +1211,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (ret) goto err_remove_irqchip_mask; - ret = gpio_device_setup_shared(gdev); + ret = gpiochip_setup_shared(gc); if (ret) goto err_remove_irqchip; @@ -1248,25 +1249,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, scoped_guard(mutex, &gpio_devices_lock) list_del_rcu(&gdev->list); synchronize_srcu(&gpio_devices_srcu); - if (gdev->dev.release) { - /* release() has been registered by gpiochip_setup_dev() */ - gpio_device_put(gdev); - goto err_print_message; - } +err_put_device: + gpio_device_put(gdev); + goto err_print_message; + err_cleanup_desc_srcu: cleanup_srcu_struct(&gdev->desc_srcu); err_cleanup_gdev_srcu: cleanup_srcu_struct(&gdev->srcu); -err_free_label: - kfree_const(gdev->label); -err_free_descs: - kfree(gdev->descs); -err_free_dev_name: - kfree(dev_name(&gdev->dev)); err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: kfree(gdev); + err_print_message: /* failures here can mean systems won't boot... */ if (ret != -EPROBE_DEFER) { @@ -2465,8 +2460,10 @@ int gpiod_request_commit(struct gpio_desc *desc, const char *label) return -EBUSY; offset = gpiod_hwgpio(desc); - if (!gpiochip_line_is_valid(guard.gc, offset)) - return -EINVAL; + if (!gpiochip_line_is_valid(guard.gc, offset)) { + ret = -EINVAL; + goto out_clear_bit; + } /* NOTE: gpio_request() can be called in early boot, * before IRQs are enabled, for non-sleeping (SOC) GPIOs. @@ -3267,8 +3264,12 @@ static int gpiochip_get(struct gpio_chip *gc, unsigned int offset) /* Make sure this is called after checking for gc->get(). */ ret = gc->get(gc, offset); - if (ret > 1) - ret = -EBADE; + if (ret > 1) { + gpiochip_warn(gc, + "invalid return value from gc->get(): %d, consider fixing the driver\n", + ret); + ret = !!ret; + } return ret; } @@ -4713,8 +4714,8 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer, * lookup table for the proxy device as previously * we only knew the consumer's fwnode. */ - ret = gpio_shared_add_proxy_lookup(consumer, con_id, - lookupflags); + ret = gpio_shared_add_proxy_lookup(consumer, fwnode, + con_id, lookupflags); if (ret) return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index afe5ca8..db7858f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -641,6 +641,7 @@ static void aca_error_fini(struct aca_error *aerr) aca_bank_error_remove(aerr, bank_error); out_unlock: + mutex_unlock(&aerr->lock); mutex_destroy(&aerr->lock); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 40c2243..4f27c75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -692,9 +692,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } - /* Drop the initial kref_init count (see drm_sched_main as example) */ - dma_fence_put(f); ret = dma_fence_wait(f, false); + /* Drop the returned fence reference after the wait completes */ + dma_fence_put(f); err_ib_sched: amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 06c1913..29b400c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1439,7 +1439,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, *process_info = info; } - vm->process_info = *process_info; + if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) { + ret = -EINVAL; + goto already_acquired; + } /* Validate page directory and attach eviction fence */ ret = amdgpu_bo_reserve(vm->root.bo, true); @@ -1479,6 +1482,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, amdgpu_bo_unreserve(vm->root.bo); reserve_pd_fail: vm->process_info = NULL; +already_acquired: if (info) { dma_fence_put(&info->eviction_fence->base); *process_info = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4662bfb..43864df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -36,6 +36,7 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) +#define AMDGPU_BO_LIST_MAX_ENTRIES (128 * 1024) static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { @@ -188,6 +189,9 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; + if (bo_number > AMDGPU_BO_LIST_MAX_ENTRIES) + return -EINVAL; + /* copy the handle array from userspace to a kernel buffer */ if (likely(info_size == bo_info_size)) { info = vmemdup_array_user(uptr, bo_number, info_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d9789e0..6d8531f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2690,8 +2690,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_set_ip_blocks(adev); - if (r) + if (r) { + adev->num_ip_blocks = 0; return r; + } break; } @@ -3247,6 +3249,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip CG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || @@ -3286,6 +3290,8 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip PG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || @@ -3493,6 +3499,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].version) + continue; if (!adev->ip_blocks[i].version->funcs->early_fini) continue; @@ -3570,6 +3578,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.sw) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_ucode_free_bo(adev); amdgpu_free_static_csa(&adev->virt.csa_obj); @@ -3596,6 +3606,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->funcs->late_fini) adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); adev->ip_blocks[i].status.late_initialized = false; @@ -4195,7 +4207,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) { - char *input = amdgpu_lockup_timeout; + char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; + char *input = buf; char *timeout_setting = NULL; int index = 0; long timeout; @@ -4205,9 +4218,17 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = adev->video_timeout = msecs_to_jiffies(2000); - if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) + if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) return 0; + /* + * strsep() destructively modifies its input by replacing delimiters + * with '\0'. Use a stack copy so the global module parameter buffer + * remains intact for multi-GPU systems where this function is called + * once per device. + */ + strscpy(buf, amdgpu_lockup_timeout, sizeof(buf)); + while ((timeout_setting = strsep(&input, ",")) && strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { ret = kstrtol(timeout_setting, 0, &timeout); @@ -7059,6 +7080,15 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + } else { + list_add_tail(&adev->reset_list, &device_list); + } if (adev->pcie_reset_ctx.swus) link_dev = adev->pcie_reset_ctx.swus; @@ -7099,19 +7129,13 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); - INIT_LIST_HEAD(&device_list); - hive = amdgpu_get_xgmi_hive(adev); if (hive) { - mutex_lock(&hive->hive_lock); reset_context.hive = hive; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) tmp_adev->pcie_reset_ctx.in_link_reset = true; - list_add_tail(&tmp_adev->reset_list, &device_list); - } } else { set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - list_add_tail(&adev->reset_list, &device_list); } r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 95d26f0..c91638e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2703,8 +2703,12 @@ static int amdgpu_pmops_freeze(struct device *dev) if (r) return r; - if (amdgpu_acpi_should_gpu_reset(adev)) - return amdgpu_asic_reset(adev); + if (amdgpu_acpi_should_gpu_reset(adev)) { + amdgpu_device_lock_reset_domain(adev->reset_domain); + r = amdgpu_asic_reset(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + return r; + } return 0; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index e2d32c2..bc772ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -404,6 +404,50 @@ void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa, } /** + * amdgpu_gart_map_gfx9_mqd - map mqd and ctrl_stack dma_addresses into GART entries + * + * @adev: amdgpu_device pointer + * @offset: offset into the GPU's gart aperture + * @pages: number of pages to bind + * @dma_addr: DMA addresses of pages + * @flags: page table entry flags + * + * Map the MQD and control stack addresses into GART entries with the correct + * memory types on gfxv9. The MQD occupies the first 4KB and is followed by + * the control stack. The MQD uses UC (uncached) memory, while the control stack + * uses NC (non-coherent) memory. + */ +void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags) +{ + uint64_t page_base; + unsigned int i, j, t; + int idx; + uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); + void *dst; + + if (!adev->gart.ptr) + return; + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return; + + t = offset / AMDGPU_GPU_PAGE_SIZE; + dst = adev->gart.ptr; + for (i = 0; i < pages; i++) { + page_base = dma_addr[i]; + for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) { + if ((i == 0) && (j == 0)) + amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags); + else + amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, ctrl_flags); + page_base += AMDGPU_GPU_PAGE_SIZE; + } + } + drm_dev_exit(idx); +} + +/** * amdgpu_gart_bind - bind pages into the gart page table * * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index d311827..6ebd2da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -62,6 +62,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags, void *dst); +void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags); void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags); void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 64c519c..569c5a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@ * PASIDs are global address space identifiers that can be shared * between the GPU, an IOMMU and the driver. VMs on different devices * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. + * space. Therefore PASIDs are allocated using IDR cyclic allocator + * (similar to kernel PID allocation) which naturally delays reuse. + * VMs are looked up from the PASID per amdgpu_device. */ -static DEFINE_IDA(amdgpu_pasid_ida); + +static DEFINE_IDR(amdgpu_pasid_idr); +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb { * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. + * Uses kernel's IDR cyclic allocator (same as PID allocation). + * Allocates sequentially with automatic wrap-around. * * Returns a positive integer on success. Returns %-EINVAL if bits==0. * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on @@ -59,14 +62,18 @@ struct amdgpu_pasid_cb { */ int amdgpu_pasid_alloc(unsigned int bits) { - int pasid = -EINVAL; + int pasid; - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), - (1U << bits) - 1, GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } + if (bits == 0) + return -EINVAL; + + spin_lock(&amdgpu_pasid_idr_lock); + /* TODO: Need to replace the idr with an xarry, and then + * handle the internal locking with ATOMIC safe paths. + */ + pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, + 1U << bits, GFP_ATOMIC); + spin_unlock(&amdgpu_pasid_idr_lock); if (pasid >= 0) trace_amdgpu_pasid_allocated(pasid); @@ -81,7 +88,10 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_free(&amdgpu_pasid_ida, pasid); + + spin_lock(&amdgpu_pasid_idr_lock); + idr_remove(&amdgpu_pasid_idr, pasid); + spin_unlock(&amdgpu_pasid_idr_lock); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -616,3 +626,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) } } } + +/** + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager + * + * Cleanup the IDR allocator. + */ +void amdgpu_pasid_mgr_cleanup(void) +{ + spin_lock(&amdgpu_pasid_idr_lock); + idr_destroy(&amdgpu_pasid_idr); + spin_unlock(&amdgpu_pasid_idr_lock); +}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index b3649cd..a579194 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(u32 pasid); void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid); +void amdgpu_pasid_mgr_cleanup(void); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 77e2133..7f19554 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -83,7 +83,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - if (adev == NULL) + if (adev == NULL || !adev->num_ip_blocks) return; amdgpu_unregister_gpu_instance(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index dc8d2f5..e244c12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -368,15 +368,15 @@ struct amdgpu_mode_info { struct drm_property *plane_ctm_property; /** - * @shaper_lut_property: Plane property to set pre-blending shaper LUT - * that converts color content before 3D LUT. If - * plane_shaper_tf_property != Identity TF, AMD color module will + * @plane_shaper_lut_property: Plane property to set pre-blending + * shaper LUT that converts color content before 3D LUT. + * If plane_shaper_tf_property != Identity TF, AMD color module will * combine the user LUT values with pre-defined TF into the LUT * parameters to be programmed. */ struct drm_property *plane_shaper_lut_property; /** - * @shaper_lut_size_property: Plane property for the size of + * @plane_shaper_lut_size_property: Plane property for the size of * pre-blending shaper LUT as supported by the driver (read-only). */ struct drm_property *plane_shaper_lut_size_property; @@ -400,10 +400,10 @@ struct amdgpu_mode_info { */ struct drm_property *plane_lut3d_property; /** - * @plane_degamma_lut_size_property: Plane property to define the max - * size of 3D LUT as supported by the driver (read-only). The max size - * is the max size of one dimension and, therefore, the max number of - * entries for 3D LUT array is the 3D LUT size cubed; + * @plane_lut3d_size_property: Plane property to define the max size + * of 3D LUT as supported by the driver (read-only). The max size is + * the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed. */ struct drm_property *plane_lut3d_size_property; /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 6e8aad9..0d3c18f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -332,13 +332,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size if (!context || !context->initialized) { dev_err(adev->dev, "TA is not initialized\n"); ret = -EINVAL; - goto err_free_shared_buf; + goto free_shared_buf; } if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { dev_err(adev->dev, "Unsupported function to invoke TA\n"); ret = -EOPNOTSUPP; - goto err_free_shared_buf; + goto free_shared_buf; } context->session_id = ta_id; @@ -346,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) - goto err_free_shared_buf; + goto unlock; ret = psp_fn_ta_invoke(psp, cmd_id); if (ret || context->resp_status) { @@ -354,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret, context->resp_status); if (!ret) { ret = -EINVAL; - goto err_free_shared_buf; + goto unlock; } } if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; -err_free_shared_buf: +unlock: mutex_unlock(&psp->ras_context.mutex); + +free_shared_buf: kfree(shared_buf); return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index eeaa56c..0ccb317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -853,25 +853,15 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); uint64_t page_idx, pages_per_xcc; int i; - uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); pages_per_xcc = total_pages; do_div(pages_per_xcc, num_xcc); for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { - /* MQD page: use default flags */ - amdgpu_gart_bind(adev, + amdgpu_gart_map_gfx9_mqd(adev, gtt->offset + (page_idx << PAGE_SHIFT), - 1, >t->ttm.dma_address[page_idx], flags); - /* - * Ctrl pages - modify the memory type to NC (ctrl_flags) from - * the second page of the BO onward. - */ - amdgpu_gart_bind(adev, - gtt->offset + ((page_idx + 1) << PAGE_SHIFT), - pages_per_xcc - 1, - >t->ttm.dma_address[page_idx + 1], - ctrl_flags); + pages_per_xcc, >t->ttm.dma_address[page_idx], + flags); } }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9d67b77..0a1b932 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) return ret; } -static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, - int queue_id) +static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; @@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, uq_funcs->mqd_destroy(queue); amdgpu_userq_fence_driver_free(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ - xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id); xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); queue->userq_mgr = NULL; list_del(&queue->userq_va_list); @@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, up_read(&adev->reset_domain->sem); } -static struct amdgpu_usermode_queue * -amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) -{ - return xa_load(&uq_mgr->userq_xa, qid); -} - void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -608,6 +600,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } + /* Validate doorbell_offset is within the doorbell BO */ + if ((u64)db_info->doorbell_offset * db_size + db_size > + amdgpu_bo_size(db_obj->obj)) { + r = -EINVAL; + goto unpin_bo; + } + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, db_info->doorbell_offset, db_size); drm_dbg_driver(adev_to_drm(uq_mgr->adev), @@ -625,22 +624,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, } static int -amdgpu_userq_destroy(struct drm_file *filp, int queue_id) +amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_usermode_queue *queue; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); - queue = amdgpu_userq_find(uq_mgr, queue_id); - if (!queue) { - drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); - mutex_unlock(&uq_mgr->userq_mutex); - return -EINVAL; - } amdgpu_userq_wait_for_last_fence(queue); /* Cancel any pending hang detection work and cleanup */ if (queue->hang_detect_fence) { @@ -672,7 +662,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); queue->state = AMDGPU_USERQ_STATE_HUNG; } - amdgpu_userq_cleanup(queue, queue_id); + amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -680,6 +670,37 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) return r; } +static void amdgpu_userq_kref_destroy(struct kref *kref) +{ + int r; + struct amdgpu_usermode_queue *queue = + container_of(kref, struct amdgpu_usermode_queue, refcount); + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; + + r = amdgpu_userq_destroy(uq_mgr, queue); + if (r) + drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r); +} + +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid) +{ + struct amdgpu_usermode_queue *queue; + + xa_lock(&uq_mgr->userq_xa); + queue = xa_load(&uq_mgr->userq_xa, qid); + if (queue) + kref_get(&queue->refcount); + xa_unlock(&uq_mgr->userq_xa); + + return queue; +} + +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue) +{ + if (queue) + kref_put(&queue->refcount, amdgpu_userq_kref_destroy); +} + static int amdgpu_userq_priority_permit(struct drm_file *filp, int priority) { @@ -834,6 +855,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } + /* drop this refcount during queue destroy */ + kref_init(&queue->refcount); + /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); @@ -985,7 +1009,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_userq *args = data; - int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_usermode_queue *queue; + int r = 0; if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; @@ -1000,11 +1026,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, drm_file_err(filp, "Failed to create usermode queue\n"); break; - case AMDGPU_USERQ_OP_FREE: - r = amdgpu_userq_destroy(filp, args->in.queue_id); - if (r) - drm_file_err(filp, "Failed to destroy usermode queue\n"); + case AMDGPU_USERQ_OP_FREE: { + xa_lock(&fpriv->userq_mgr.userq_xa); + queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id); + xa_unlock(&fpriv->userq_mgr.userq_xa); + if (!queue) + return -ENOENT; + + amdgpu_userq_put(queue); break; + } default: drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); @@ -1023,16 +1054,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, "trying restore queue without va mapping\n"); queue->state = AMDGPU_USERQ_STATE_INVALID_VA; + amdgpu_userq_put(queue); continue; } r = amdgpu_userq_restore_helper(queue); if (r) ret = r; + + amdgpu_userq_put(queue); } if (ret) @@ -1266,9 +1304,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; + amdgpu_userq_put(queue); } if (ret) @@ -1301,16 +1343,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) int ret; xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + struct dma_fence *f = queue->last_fence; - if (!f || dma_fence_is_signaled(f)) + if (!f || dma_fence_is_signaled(f)) { + amdgpu_userq_put(queue); continue; + } ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", f->context, f->seqno); + amdgpu_userq_put(queue); return -ETIMEDOUT; } + amdgpu_userq_put(queue); } return 0; @@ -1361,20 +1411,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { struct amdgpu_usermode_queue *queue; - unsigned long queue_id; + unsigned long queue_id = 0; - cancel_delayed_work_sync(&userq_mgr->resume_work); + for (;;) { + xa_lock(&userq_mgr->userq_xa); + queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX, + XA_PRESENT); + if (queue) + __xa_erase(&userq_mgr->userq_xa, queue_id); + xa_unlock(&userq_mgr->userq_xa); - mutex_lock(&userq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(userq_mgr); - xa_for_each(&userq_mgr->userq_xa, queue_id, queue) { - amdgpu_userq_wait_for_last_fence(queue); - amdgpu_userq_unmap_helper(queue); - amdgpu_userq_cleanup(queue, queue_id); + if (!queue) + break; + + amdgpu_userq_put(queue); } xa_destroy(&userq_mgr->userq_xa); - mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 5845d895..736c1d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -74,6 +74,7 @@ struct amdgpu_usermode_queue { struct dentry *debugfs_queue; struct delayed_work hang_detect_work; struct dma_fence *hang_detect_fence; + struct kref refcount; struct list_head userq_va_list; }; @@ -112,6 +113,9 @@ struct amdgpu_db_info { struct amdgpu_userq_obj *db_obj; }; +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid); +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue); + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8013260..5239b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -35,6 +35,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops; static struct kmem_cache *amdgpu_userq_fence_slab; +#define AMDGPU_USERQ_MAX_HANDLES (1U << 16) + int amdgpu_userq_fence_slab_init(void) { amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", @@ -464,7 +466,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_userq_signal *args = data; struct drm_gem_object **gobj_write = NULL; struct drm_gem_object **gobj_read = NULL; - struct amdgpu_usermode_queue *queue; + struct amdgpu_usermode_queue *queue = NULL; struct amdgpu_userq_fence *userq_fence; struct drm_syncobj **syncobj = NULL; u32 *bo_handles_write, num_write_bo_handles; @@ -478,6 +480,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), size_mul(sizeof(u32), num_syncobj_handles)); @@ -546,7 +553,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } /* Retrieve the user queue */ - queue = xa_load(&userq_mgr->userq_xa, args->queue_id); + queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; goto put_gobj_write; @@ -641,6 +648,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, free_syncobj_handles: kfree(syncobj_handles); + if (queue) + amdgpu_userq_put(queue); + return r; } @@ -653,7 +663,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_userq_wait *wait_info = data; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; - struct amdgpu_usermode_queue *waitq; + struct amdgpu_usermode_queue *waitq = NULL; struct drm_gem_object **gobj_write; struct drm_gem_object **gobj_read; struct dma_fence **fences = NULL; @@ -664,6 +674,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), size_mul(sizeof(u32), num_read_bo_handles)); @@ -833,7 +848,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, DMA_RESV_USAGE_READ, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -850,7 +865,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, DMA_RESV_USAGE_WRITE, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -874,8 +889,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_fences; dma_fence_unwrap_for_each(f, &iter, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } @@ -898,8 +914,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (r) goto free_fences; - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } @@ -912,7 +929,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, */ num_fences = dma_fence_dedup_array(fences, num_fences); - waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id); + waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); if (!waitq) { r = -EINVAL; goto free_fences; @@ -969,32 +986,14 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, r = -EFAULT; goto free_fences; } - - kfree(fences); - kfree(fence_info); } - drm_exec_fini(&exec); - for (i = 0; i < num_read_bo_handles; i++) - drm_gem_object_put(gobj_read[i]); - kfree(gobj_read); - - for (i = 0; i < num_write_bo_handles; i++) - drm_gem_object_put(gobj_write[i]); - kfree(gobj_write); - - kfree(timeline_points); - kfree(timeline_handles); - kfree(syncobj_handles); - kfree(bo_handles_write); - kfree(bo_handles_read); - - return 0; - free_fences: - while (num_fences-- > 0) - dma_fence_put(fences[num_fences]); - kfree(fences); + if (fences) { + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); + } free_fence_info: kfree(fence_info); exec_fini: @@ -1018,5 +1017,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, free_bo_handles_read: kfree(bo_handles_read); + if (waitq) + amdgpu_userq_put(waitq); + return r; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f2beb98..a677e38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1069,7 +1069,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, } /* Prepare a TLB flush fence to be attached to PTs */ - if (!params->unlocked) { + /* The check for need_tlb_fence should be dropped once we + * sort out the issues with KIQ/MES TLB invalidation timeouts. + */ + if (!params->unlocked && vm->need_tlb_fence) { amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ @@ -2602,6 +2605,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_lru_bulk_move_init(&vm->lru_bulk_move); vm->is_compute_context = false; + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); @@ -2739,6 +2743,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; + vm->need_tlb_fence = true; unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); @@ -2893,6 +2898,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); + amdgpu_pasid_mgr_cleanup(); } /** @@ -2968,14 +2974,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!root) return false; - addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, ts, write_fault)) { + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } + addr /= AMDGPU_GPU_PAGE_SIZE; + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 806d62e..d5b7061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -173,7 +173,7 @@ struct amdgpu_bo_vm; #define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20) #define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \ - AMDGPU_VA_RESERVED_SEQ64_SIZE) -#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12) +#define AMDGPU_VA_RESERVED_TRAP_SIZE (1ULL << 16) #define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \ - AMDGPU_VA_RESERVED_TRAP_SIZE) #define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16) @@ -441,6 +441,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ + bool need_tlb_fence; /* Memory partition number, -1 means any partition */ int8_t mem_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e35ed0c..8eba99a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -662,28 +662,35 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } else { switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega10) ? + mmhub_client_ids_vega10[cid][rw] : NULL; break; case IP_VERSION(9, 3, 0): - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega12) ? + mmhub_client_ids_vega12[cid][rw] : NULL; break; case IP_VERSION(9, 4, 0): - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega20) ? + mmhub_client_ids_vega20[cid][rw] : NULL; break; case IP_VERSION(9, 4, 1): - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_arcturus) ? + mmhub_client_ids_arcturus[cid][rw] : NULL; break; case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 0): - mmhub_cid = mmhub_client_ids_raven[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_raven) ? + mmhub_client_ids_raven[cid][rw] : NULL; break; case IP_VERSION(1, 5, 0): case IP_VERSION(2, 4, 0): - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_renoir) ? + mmhub_client_ids_renoir[cid][rw] : NULL; break; case IP_VERSION(1, 8, 0): case IP_VERSION(9, 4, 2): - mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_aldebaran) ? + mmhub_client_ids_aldebaran[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index b3590b3..485ecde 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -129,7 +129,7 @@ static int isp_genpd_add_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to add\n"); goto exit; } @@ -165,7 +165,7 @@ static int isp_genpd_remove_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to remove\n"); goto exit; }
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 8c74894..faac21e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -324,8 +324,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va, 2048); - if (r) + if (r) { + kfree(compute_mqd); goto free_mqd; + } userq_props->eop_gpu_addr = compute_mqd->eop_va; userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; @@ -365,12 +367,16 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va, shadow_info.shadow_size); - if (r) + if (r) { + kfree(mqd_gfx_v11); goto free_mqd; + } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va, shadow_info.csa_size); - if (r) + if (r) { + kfree(mqd_gfx_v11); goto free_mqd; + } kfree(mqd_gfx_v11); } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { @@ -390,8 +396,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, } r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va, 32); - if (r) + if (r) { + kfree(mqd_sdma_v11); goto free_mqd; + } userq_props->csa_addr = mqd_sdma_v11->csa_va; kfree(mqd_sdma_v11);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 09ebb13..a926a33 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -720,11 +720,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(mes->adev->dev, - "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b1c864d..023c734 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -731,6 +731,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) int i; struct amdgpu_device *adev = mes->adev; union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + uint32_t mes_rev = (pipe == AMDGPU_MES_SCHED_PIPE) ? + (mes->sched_version & AMDGPU_MES_VERSION_MASK) : + (mes->kiq_version & AMDGPU_MES_VERSION_MASK); memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); @@ -779,18 +782,13 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(adev->dev, - "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell * handling support, other queue will not use the oversubscribe timer. * handling mode - 0: disabled; 1: basic version; 2: basic+ version */ - mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.oversubscription_timer = mes_rev < 0x8b ? 0 : 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; if (amdgpu_mes_log_enable) {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a0cc8e2..534cb4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -154,14 +154,17 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): - mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_navi1x) ? + mmhub_client_ids_navi1x[cid][rw] : NULL; break; case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): - mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_sienna_cichlid) ? + mmhub_client_ids_sienna_cichlid[cid][rw] : NULL; break; case IP_VERSION(2, 1, 2): - mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_beige_goby) ? + mmhub_client_ids_beige_goby[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 5eb8122..ceb2f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -94,7 +94,8 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): - mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vangogh) ? + mmhub_client_ids_vangogh[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 7d5242d..ab966e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -110,7 +110,8 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_0) ? + mmhub_client_ids_v3_0_0[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 910337d..14a742d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -117,7 +117,8 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_1) ? + mmhub_client_ids_v3_0_1[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index f0f182f..e1f07f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -108,7 +108,8 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_2) ? + mmhub_client_ids_v3_0_2[cid][rw] : NULL; dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 9519984..88bfe32 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -102,7 +102,8 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 1, 0): - mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_1_0) ? + mmhub_client_ids_v4_1_0[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c index a72770e..2532ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
@@ -688,7 +688,8 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 2, 0): - mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_2_0) ? + mmhub_client_ids_v4_2_0[cid][rw] : NULL; break; default: mmhub_cid = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 9aa9889..fb7aaf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -170,7 +170,8 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) int retry_loop; /* For a reset done at the end of S3, only wait for TOS to be unloaded */ - if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev)) + if ((adev->in_s4 || adev->in_s3) && !(adev->flags & AMD_IS_APU) && + amdgpu_in_reset(adev)) return psp_v11_wait_for_tos_unload(psp); for (retry_loop = 0; retry_loop < 20; retry_loop++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c index 723ddae..73a7097 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
@@ -69,12 +69,12 @@ static int psp_v15_0_0_ring_stop(struct psp_context *psp, 0x80000000, 0x80000000, false); } else { /* Write the ring destroy command*/ - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); } @@ -116,7 +116,7 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); @@ -125,23 +125,23 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_69, psp_ring_reg); /* Write high address of the ring to C2PMSG_70 */ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_70, psp_ring_reg); /* Write size of ring to C2PMSG_71 */ psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_71, psp_ring_reg); /* Write the ring initialization command to C2PMSG_64 */ psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x8000FFFF, false); } @@ -174,7 +174,7 @@ static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); else - data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + data = RREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67); return data; } @@ -188,7 +188,7 @@ static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); } else - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67, value); } static const struct psp_funcs psp_v15_0_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8122a5c..a0ad1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -858,7 +858,9 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; - adev->pg_flags = AMD_PG_SUPPORT_VCN | + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x1;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 0202df5..6109124 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -174,6 +174,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 09dabb3..462a32a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3170,11 +3170,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v struct kfd_process *process; int ret; - /* Each FD owns only one kfd_process */ - if (p->context_id != KFD_CONTEXT_ID_PRIMARY) + if (!filep->private_data || !p) return -EINVAL; - if (!filep->private_data || !p) + /* Each FD owns only one kfd_process */ + if (p->context_id != KFD_CONTEXT_ID_PRIMARY) return -EINVAL; mutex_lock(&kfd_processes_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index d5c234f..a535f15 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -42,9 +42,16 @@ static uint64_t mqd_stride_v9(struct mqd_manager *mm, struct queue_properties *q) { if (mm->dev->kfd->cwsr_enabled && - q->type == KFD_QUEUE_TYPE_COMPUTE) - return ALIGN(q->ctl_stack_size, PAGE_SIZE) + - ALIGN(sizeof(struct v9_mqd), PAGE_SIZE); + q->type == KFD_QUEUE_TYPE_COMPUTE) { + + /* On gfxv9, the MQD resides in the first 4K page, + * followed by the control stack. Align both to + * AMDGPU_GPU_PAGE_SIZE to maintain the required 4K boundary. + */ + + return ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE); + } return mm->mqd_size; } @@ -151,8 +158,8 @@ static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm, if (!mqd_mem_obj) return NULL; retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev, - (ALIGN(q->ctl_stack_size, PAGE_SIZE) + - ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) * + (ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE)) * NUM_XCC(node->xcc_mask), mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, @@ -360,7 +367,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, struct kfd_context_save_area_header header; /* Control stack is located one page after MQD. */ - void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); + void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); @@ -397,7 +404,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi { struct v9_mqd *m; /* Control stack is located one page after MQD. */ - void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); + void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); @@ -443,7 +450,7 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd, *gart_addr = addr; /* Control stack is located one page after MQD. */ - ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE); + ctl_stack = (void *)((uintptr_t)*mqd + AMDGPU_GPU_PAGE_SIZE); memcpy(ctl_stack, ctl_stack_src, ctl_stack_size); m->cp_hqd_pq_doorbell_control =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e5b5641..035687a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -102,8 +102,8 @@ * The first chunk is the TBA used for the CWSR ISA code. The second * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode. */ -#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) -#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048) +#define KFD_CWSR_TBA_TMA_SIZE (AMDGPU_GPU_PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET (AMDGPU_GPU_PAGE_SIZE + 2048) #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ (KFD_MAX_NUM_OF_PROCESSES * \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 8ea3169..f5d2847e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -593,6 +593,7 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, p->queue_size)) { pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", p->queue_address, p->queue_size); + amdgpu_bo_unreserve(vm->root.bo); return -EFAULT; }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index bbe869c..28354a4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -249,10 +249,10 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope topo_dev->node_props.gfx_target_version < 90000) /* metadata_queue_size not supported on GFX7/GFX8 */ expected_queue_size = - properties->queue_size / 2; + PAGE_ALIGN(properties->queue_size / 2); else expected_queue_size = - properties->queue_size + properties->metadata_queue_size; + PAGE_ALIGN(properties->queue_size + properties->metadata_queue_size); vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); @@ -492,10 +492,11 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); wave_num = get_num_waves(props, gfxv, cu_num); - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), + AMDGPU_GPU_PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, - PAGE_SIZE); + AMDGPU_GPU_PAGE_SIZE); if ((gfxv / 10000 * 10000) == 100000) { /* HW design limits control stack size to 0x7000. @@ -507,7 +508,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) props->ctl_stack_size = ctl_stack_size; props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); - props->cwsr_size = ctl_stack_size + wg_data_size; + props->cwsr_size = ALIGN(ctl_stack_size + wg_data_size, PAGE_SIZE); if (gfxv == 80002) /* GFX_VERSION_TONGA */ props->eop_buffer_size = 0x8000;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b3d6f2c..2328c1a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2554,7 +2554,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fw_meta_info_params.fw_inst_const = adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES_256; - fw_meta_info_params.fw_bss_data = region_params.bss_data_size ? adev->dm.dmub_fw->data + + fw_meta_info_params.fw_bss_data = fw_meta_info_params.bss_data_size ? adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + le32_to_cpu(hdr->inst_const_bytes) : NULL; fw_meta_info_params.custom_psp_footer_size = 0; @@ -3909,8 +3909,9 @@ void amdgpu_dm_update_connector_after_detect( aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); + drm_edid_free(aconnector->drm_edid); + aconnector->drm_edid = NULL; if (sink->dc_edid.length == 0) { - aconnector->drm_edid = NULL; hdmi_cec_unset_edid(aconnector); if (aconnector->dc_link->aux_mode) { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); @@ -5422,7 +5423,7 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, caps = &dm->backlight_caps[aconnector->bl_idx]; /* Only offer ABM property when non-OLED and user didn't turn off by module parameter */ - if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) + if (caps->ext_caps && !caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) drm_object_attach_property(&aconnector->base.base, dm->adev->mode_info.abm_level_property, ABM_SYSFS_CONTROL); @@ -12524,6 +12525,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (dc_resource_is_dsc_encoding_supported(dc)) { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed; + } + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = add_affected_mst_dsc_crtcs(state, crtc); if (ret) { @@ -13119,7 +13125,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, u16 min_vfreq; u16 max_vfreq; - if (edid == NULL || edid->extensions == 0) + if (!edid || !edid->extensions) return; /* Find DisplayID extension */ @@ -13129,7 +13135,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, break; } - if (edid_ext == NULL) + if (i == edid->extensions) return; while (j < EDID_LENGTH) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 8008136..d15812d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -984,6 +984,7 @@ struct dm_crtc_state { bool freesync_vrr_info_changed; + bool mode_changed_independent_from_dsc; bool dsc_force_changed; bool vrr_supported; struct mod_freesync_config freesync_config;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 2ba98f3..cd1e58b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1706,6 +1706,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state, struct dc_transfer_func *tf = &dc_plane_state->in_shaper_func; struct drm_atomic_state *state = plane_state->state; const struct amdgpu_device *adev = drm_to_adev(colorop->dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; const struct drm_device *dev = colorop->dev; const struct drm_color_lut32 *lut3d; uint32_t lut3d_size; @@ -1722,7 +1723,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state, } if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_3D_LUT) { - if (!adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (!has_3dlut) { drm_dbg(dev, "3D LUT is not supported by hardware\n"); return -EINVAL; } @@ -1875,6 +1876,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, struct drm_colorop *colorop = plane_state->color_pipeline; struct drm_device *dev = plane_state->plane->dev; struct amdgpu_device *adev = drm_to_adev(dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; int ret; /* 1D Curve - DEGAM TF */ @@ -1907,7 +1909,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state, if (ret) return ret; - if (adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (has_3dlut) { /* 1D Curve & LUT - SHAPER TF & LUT */ colorop = colorop->next; if (!colorop) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index f25c0ed..aa46588 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -37,19 +37,19 @@ const u64 amdgpu_dm_supported_degam_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); const u64 amdgpu_dm_supported_blnd_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); #define MAX_COLOR_PIPELINE_OPS 10 @@ -60,6 +60,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr struct drm_colorop *ops[MAX_COLOR_PIPELINE_OPS]; struct drm_device *dev = plane->dev; struct amdgpu_device *adev = drm_to_adev(dev); + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; int ret; int i = 0; @@ -112,7 +113,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr i++; - if (adev->dm.dc->caps.color.dpp.hw_3d_lut) { + if (has_3dlut) { /* 1D curve - SHAPER TF */ ops[i] = kzalloc_obj(*ops[0]); if (!ops[i]) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 130190e..304437c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -765,15 +765,15 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, dm->adev->mode_info.crtcs[crtc_index] = acrtc; /* Don't enable DRM CRTC degamma property for - * 1. Degamma is replaced by color pipeline. - * 2. DCE since it doesn't support programmable degamma anywhere. - * 3. DCN401 since pre-blending degamma LUT doesn't apply to cursor. + * 1. DCE since it doesn't support programmable degamma anywhere. + * 2. DCN401 since pre-blending degamma LUT doesn't apply to cursor. + * Note: DEGAMMA properties are created even if the primary plane has the + * COLOR_PIPELINE property. User space can use either the DEGAMMA properties + * or the COLOR_PIPELINE property. An atomic commit which attempts to enable + * both is rejected. */ - if (plane->color_pipeline_property) - has_degamma = false; - else - has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch && - dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01; + has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01; drm_crtc_enable_color_mgmt(&acrtc->base, has_degamma ? MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7be50e8..5d8c4c7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ind = find_crtc_index_in_state_by_stream(state, stream); if (ind >= 0) { + struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state); + DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n", __func__, __LINE__, stream); - state->crtcs[ind].new_state->mode_changed = 0; + dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc; } } }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 70587e5..127207e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1256,6 +1256,14 @@ static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; + /* Reject commits that attempt to use both COLOR_PIPELINE and CRTC DEGAMMA_LUT */ + if (new_plane_state->color_pipeline && new_crtc_state->degamma_lut) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] COLOR_PIPELINE and CRTC DEGAMMA_LUT cannot be enabled simultaneously\n", + plane->base.id, plane->name); + return -EINVAL; + } + ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, new_plane_state, &scaling_info); if (ret) return ret;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 08d0e05..d237d7b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -255,6 +255,10 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_2_01) { + dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + return &clk_mgr->base; + } if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; @@ -267,10 +271,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } - if (ctx->dce_version == DCN_VERSION_2_01) { - dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - return &clk_mgr->base; - } dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 246893d..baf820e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -170,11 +170,11 @@ struct dc_stream_state *dc_create_stream_for_sink( if (sink == NULL) goto fail; - stream = kzalloc_obj(struct dc_stream_state); + stream = kzalloc_obj(struct dc_stream_state, GFP_ATOMIC); if (stream == NULL) goto fail; - stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_KERNEL); + stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_ATOMIC); if (stream->update_scratch == NULL) goto fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 3711d40..4c4e61b 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h
@@ -38,7 +38,11 @@ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1),\ SR(DISPCLK_FREQ_CHANGE_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL) + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV),\ + SR(MILLISECOND_TIME_BASE_DIV),\ + SR(DCCG_GATE_DISABLE_CNTL),\ + SR(DCCG_GATE_DISABLE_CNTL2) #define DCCG_REG_LIST_DCN2() \ DCCG_COMMON_REG_LIST_DCN_BASE(),\
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c index 75c6934..c4d4eea 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c
@@ -96,6 +96,25 @@ static void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppcl dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; } +/* + * On DCN21 S0i3 resume, BIOS programs MICROSECOND_TIME_BASE_DIV to + * 0x00120464 as a marker that golden init has already been done. + * dcn21_s0i3_golden_init_wa() reads this marker later in bios_golden_init() + * to decide whether to skip golden init. + * + * dccg2_init() unconditionally overwrites MICROSECOND_TIME_BASE_DIV to + * 0x00120264, destroying the marker before it can be read. + * + * Guard the call: if the S0i3 marker is present, skip dccg2_init() so the + * WA can function correctly. bios_golden_init() will handle init in that case. + */ +static void dccg21_init(struct dccg *dccg) +{ + if (dccg2_is_s0i3_golden_init_wa_done(dccg)) + return; + + dccg2_init(dccg); +} static const struct dccg_funcs dccg21_funcs = { .update_dpp_dto = dccg21_update_dpp_dto, @@ -103,7 +122,7 @@ static const struct dccg_funcs dccg21_funcs = { .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, .otg_add_pixel = dccg2_otg_add_pixel, .otg_drop_pixel = dccg2_otg_drop_pixel, - .dccg_init = dccg2_init, + .dccg_init = dccg21_init, .refclk_setup = dccg2_refclk_setup, /* Deprecated - for backward compatibility only */ .allow_clock_gating = dccg2_allow_clock_gating, .enable_memory_low_power = dccg2_enable_memory_low_power,
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h index 067e49c..e2381ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h
@@ -34,7 +34,13 @@ DCCG_SRII(DTO_PARAM, DPPCLK, 1),\ DCCG_SRII(DTO_PARAM, DPPCLK, 2),\ DCCG_SRII(DTO_PARAM, DPPCLK, 3),\ - SR(REFCLK_CNTL) + SR(REFCLK_CNTL),\ + SR(DISPCLK_FREQ_CHANGE_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV),\ + SR(MILLISECOND_TIME_BASE_DIV),\ + SR(DCCG_GATE_DISABLE_CNTL),\ + SR(DCCG_GATE_DISABLE_CNTL2) #define DCCG_MASK_SH_LIST_DCN301(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_ENABLE, DPPCLK, 0, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h index bf65992..b5e3849 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h
@@ -64,9 +64,12 @@ SR(DSCCLK1_DTO_PARAM),\ SR(DSCCLK2_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ - SR(HDMISTREAMCLK0_DTO_PARAM) + SR(HDMISTREAMCLK0_DTO_PARAM),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV) #define DCCG_MASK_SH_LIST_DCN31(mask_sh) \
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h index a609635..ecbdc05 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h
@@ -70,11 +70,14 @@ SR(DSCCLK2_DTO_PARAM),\ SR(DSCCLK3_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ SR(HDMISTREAMCLK0_DTO_PARAM),\ SR(OTG_PIXEL_RATE_DIV),\ - SR(DTBCLK_P_CNTL) + SR(DTBCLK_P_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL),\ + SR(MICROSECOND_TIME_BASE_DIV) #define DCCG_MASK_SH_LIST_DCN314_COMMON(mask_sh) \ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b91517b..4dfb6c8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -72,7 +72,11 @@ void dcn401_initialize_min_clocks(struct dc *dc) * audio corruption. Read current DISPCLK from DENTIST and request the same * freq to ensure that the timing is valid and unchanged. */ - clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + if (dc->clk_mgr->funcs->get_dispclk_from_dentist) { + clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + } else { + clocks->dispclk_khz = dc->clk_mgr->boot_snapshot.dispclk * 1000; + } } clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; clocks->fclk_p_state_change_support = true; @@ -143,6 +147,7 @@ void dcn401_init_hw(struct dc *dc) int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; + bool dchub_ref_freq_changed; int current_dchub_ref_freq = 0; if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { @@ -356,14 +361,18 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + + /* sw and fw FAMS versions must match for support */ dc->debug.fams2_config.bits.enable &= - dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support - if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) - || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { + dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; + dchub_ref_freq_changed = + res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq; + if ((!dc->debug.fams2_config.bits.enable || dchub_ref_freq_changed) && + dc->res_pool->funcs->update_bw_bounding_box && + dc->clk_mgr && dc->clk_mgr->bw_params) { /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ - if (dc->clk_mgr) - dc->res_pool->funcs->update_bw_bounding_box(dc, - dc->clk_mgr->bw_params); + dc->res_pool->funcs->update_bw_bounding_box(dc, + dc->clk_mgr->bw_params); } } }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 92c123ac..fdcf8db 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
@@ -650,9 +650,6 @@ static struct link_encoder *dce100_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -661,7 +658,8 @@ static struct link_encoder *dce100_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index 95852d27..ab71f64 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
@@ -671,7 +671,7 @@ static struct link_encoder *dce110_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -682,7 +682,8 @@ static struct link_encoder *dce110_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 58c6a00..b7051bf 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
@@ -632,7 +632,7 @@ static struct link_encoder *dce112_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -643,7 +643,8 @@ static struct link_encoder *dce112_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 71d76b0..7ee70f7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -716,7 +716,7 @@ static struct link_encoder *dce120_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -727,7 +727,8 @@ static struct link_encoder *dce120_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index c276457..3d52973dd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -746,18 +746,16 @@ static struct link_encoder *dce60_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); dce60_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index d66d8ac..8992772 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -752,9 +752,6 @@ static struct link_encoder *dce80_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -763,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; }
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index bbe185e..4663456 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -71,6 +71,7 @@ #include "dce/dce_dmcu.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dio/dcn10/dcn10_dio.h" #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f @@ -444,6 +445,33 @@ static const struct dcn_hubbub_mask hubbub_mask = { HUBBUB_MASK_SH_LIST_DCN10(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn10_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static int map_transmitter_id_to_phy_instance( enum transmitter transmitter) { @@ -917,6 +945,11 @@ static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.opps[i] != NULL) pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); @@ -1653,6 +1686,14 @@ static bool dcn10_resource_construct( goto fail; } + /* DIO */ + pool->base.dio = dcn10_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto fail; + } + if (!resource_construct(num_virtual_links, dc, &pool->base, &res_create_funcs)) goto fail;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 8b55518..74e8d22 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -82,6 +82,7 @@ #include "dce/dce_dmcu.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dio/dcn10/dcn10_dio.h" #include "vm_helper.h" #include "link_enc_cfg.h" @@ -550,6 +551,33 @@ static const struct dcn_hubbub_mask hubbub_mask = { HUBBUB_MASK_SH_LIST_DCN20(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn20_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + #define vmid_regs(id)\ [id] = {\ DCN20_VMID_REG_LIST(id)\ @@ -1104,6 +1132,12 @@ static void dcn20_resource_destruct(struct dcn20_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn20_dpp_destroy(&pool->base.dpps[i]); @@ -2692,6 +2726,14 @@ static bool dcn20_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn20_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { pool->base.dscs[i] = dcn20_dsc_create(ctx, i); if (pool->base.dscs[i] == NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 4ea76e4..e289be7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -56,6 +56,7 @@ #include "dce/dce_aux.h" #include "dce/dce_i2c.h" #include "dcn10/dcn10_resource.h" +#include "dio/dcn10/dcn10_dio.h" #include "cyan_skillfish_ip_offset.h" @@ -755,6 +756,33 @@ static struct hubbub *dcn201_hubbub_create(struct dc_context *ctx) return &hubbub->base; } +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn201_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct timing_generator *dcn201_timing_generator_create( struct dc_context *ctx, uint32_t instance) @@ -930,6 +958,11 @@ static void dcn201_resource_destruct(struct dcn201_resource_pool *pool) pool->base.hubbub = NULL; } + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn201_dpp_destroy(&pool->base.dpps[i]); @@ -1276,6 +1309,14 @@ static bool dcn201_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn201_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + if (!resource_construct(num_virtual_links, dc, &pool->base, &res_create_funcs)) goto create_fail;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 0f4307f..4333baa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -84,6 +84,7 @@ #include "dce/dce_dmcu.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" +#include "dio/dcn10/dcn10_dio.h" #include "dcn21_resource.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" @@ -329,6 +330,25 @@ static const struct dcn_hubbub_mask hubbub_mask = { HUBBUB_MASK_SH_LIST_DCN21(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +static const struct dcn_dio_shift dio_shift = { 0 }; + +static const struct dcn_dio_mask dio_mask = { 0 }; + +static struct dio *dcn21_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} #define vmid_regs(id)\ [id] = {\ @@ -677,6 +697,12 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn20_dpp_destroy(&pool->base.dpps[i]); @@ -1654,6 +1680,14 @@ static bool dcn21_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn21_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { pool->base.dscs[i] = dcn21_dsc_create(ctx, i); if (pool->base.dscs[i] == NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 2fa86b95..87b7b4e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -60,6 +60,7 @@ #include "dml/display_mode_vba.h" #include "dcn30/dcn30_dccg.h" #include "dcn10/dcn10_resource.h" +#include "dio/dcn10/dcn10_dio.h" #include "link_service.h" #include "dce/dce_panel_cntl.h" @@ -886,6 +887,33 @@ static struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) return &hubbub3->base; } +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn30_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct timing_generator *dcn30_timing_generator_create( struct dc_context *ctx, uint32_t instance) @@ -1095,6 +1123,12 @@ static void dcn30_resource_destruct(struct dcn30_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn30_dpp_destroy(&pool->base.dpps[i]); @@ -2464,6 +2498,14 @@ static bool dcn30_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn30_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn30_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 7842bee..6bb1c62 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -59,6 +59,7 @@ #include "dml/display_mode_vba.h" #include "dcn301/dcn301_dccg.h" #include "dcn10/dcn10_resource.h" +#include "dio/dcn10/dcn10_dio.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn301/dcn301_dio_link_encoder.h" #include "dcn301/dcn301_panel_cntl.h" @@ -843,6 +844,33 @@ static struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) return &hubbub3->base; } +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn301_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct timing_generator *dcn301_timing_generator_create( struct dc_context *ctx, uint32_t instance) { @@ -1066,6 +1094,12 @@ static void dcn301_destruct(struct dcn301_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn301_dpp_destroy(&pool->base.dpps[i]); @@ -1582,6 +1616,14 @@ static bool dcn301_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn301_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + j = 0; /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 1874d5d6..d02aafd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -46,6 +46,7 @@ #include "dml/dcn30/dcn30_fpu.h" #include "dcn10/dcn10_resource.h" +#include "dio/dcn10/dcn10_dio.h" #include "link_service.h" @@ -253,6 +254,33 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn302_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn302_hubbub_create(struct dc_context *ctx) { int i; @@ -1022,6 +1050,11 @@ static void dcn302_resource_destruct(struct resource_pool *pool) pool->hubbub = NULL; } + if (pool->dio != NULL) { + kfree(TO_DCN10_DIO(pool->dio)); + pool->dio = NULL; + } + for (i = 0; i < pool->pipe_count; i++) { if (pool->dpps[i] != NULL) { kfree(TO_DCN20_DPP(pool->dpps[i])); @@ -1372,6 +1405,14 @@ static bool dcn302_resource_construct( goto create_fail; } + /* DIO */ + pool->dio = dcn302_dio_create(ctx); + if (pool->dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->pipe_count; i++) { pool->hubps[i] = dcn302_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index d52201c..30b1403 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -46,6 +46,7 @@ #include "dml/dcn30/dcn30_fpu.h" #include "dcn10/dcn10_resource.h" +#include "dio/dcn10/dcn10_dio.h" #include "link_service.h" @@ -249,6 +250,33 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + +static struct dio *dcn303_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn303_hubbub_create(struct dc_context *ctx) { int i; @@ -966,6 +994,11 @@ static void dcn303_resource_destruct(struct resource_pool *pool) pool->hubbub = NULL; } + if (pool->dio != NULL) { + kfree(TO_DCN10_DIO(pool->dio)); + pool->dio = NULL; + } + for (i = 0; i < pool->pipe_count; i++) { if (pool->dpps[i] != NULL) { kfree(TO_DCN20_DPP(pool->dpps[i])); @@ -1304,6 +1337,14 @@ static bool dcn303_resource_construct( goto create_fail; } + /* DIO */ + pool->dio = dcn303_dio_create(ctx); + if (pool->dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->pipe_count; i++) { pool->hubps[i] = dcn303_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 2055f1f..4e9c041 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -64,6 +64,7 @@ #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "clk_mgr.h" +#include "dio/dcn10/dcn10_dio.h" #include "dio/virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" @@ -810,6 +811,21 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn31 = { .num_timing_generator = 4, .num_opp = 4, @@ -1021,6 +1037,18 @@ static struct mpc *dcn31_mpc_create( return &mpc30->base; } +static struct dio *dcn31_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) { int i; @@ -1396,6 +1424,10 @@ static void dcn31_resource_destruct(struct dcn31_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn31_dpp_destroy(&pool->base.dpps[i]); @@ -2063,6 +2095,14 @@ static bool dcn31_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn31_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn31_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 1939f72..e26a642 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -66,6 +66,7 @@ #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "clk_mgr.h" +#include "dio/dcn10/dcn10_dio.h" #include "dio/virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" @@ -822,6 +823,21 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn314 = { .num_timing_generator = 4, .num_opp = 4, @@ -1079,6 +1095,18 @@ static struct mpc *dcn31_mpc_create( return &mpc30->base; } +static struct dio *dcn314_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) { int i; @@ -1455,6 +1483,10 @@ static void dcn314_resource_destruct(struct dcn314_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn31_dpp_destroy(&pool->base.dpps[i]); @@ -1987,6 +2019,14 @@ static bool dcn314_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn314_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn31_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index e8377c1..131a6cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -63,6 +63,7 @@ #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "clk_mgr.h" +#include "dio/dcn10/dcn10_dio.h" #include "dio/virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" @@ -809,6 +810,21 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn31 = { .num_timing_generator = 4, .num_opp = 4, @@ -1020,6 +1036,18 @@ static struct mpc *dcn31_mpc_create( return &mpc30->base; } +static struct dio *dcn315_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) { int i; @@ -1397,6 +1425,10 @@ static void dcn315_resource_destruct(struct dcn315_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn31_dpp_destroy(&pool->base.dpps[i]); @@ -2012,6 +2044,14 @@ static bool dcn315_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn315_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn31_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 045ce01..c8c0ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
@@ -63,6 +63,7 @@ #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" #include "clk_mgr.h" +#include "dio/dcn10/dcn10_dio.h" #include "dio/virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" @@ -804,6 +805,21 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static const struct dcn_dio_registers dio_regs = { + DIO_REG_LIST_DCN10() +}; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn31 = { .num_timing_generator = 4, .num_opp = 4, @@ -1013,6 +1029,18 @@ static struct mpc *dcn31_mpc_create( return &mpc30->base; } +static struct dio *dcn316_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) { int i; @@ -1392,6 +1420,10 @@ static void dcn316_resource_destruct(struct dcn316_resource_pool *pool) kfree(pool->base.hubbub); pool->base.hubbub = NULL; } + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } for (i = 0; i < pool->base.pipe_count; i++) { if (pool->base.dpps[i] != NULL) dcn31_dpp_destroy(&pool->base.dpps[i]); @@ -1887,6 +1919,14 @@ static bool dcn316_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn316_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn31_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 7ebb7d1..c3a6ae14 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -66,6 +66,7 @@ #include "dce/dce_hwseq.h" #include "clk_mgr.h" #include "dio/virtual/virtual_stream_encoder.h" +#include "dio/dcn10/dcn10_dio.h" #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" @@ -643,6 +644,19 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static struct dcn_dio_registers dio_regs; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn32 = { .num_timing_generator = 4, .num_opp = 4, @@ -833,6 +847,22 @@ static struct clock_source *dcn32_clock_source_create( return NULL; } +static struct dio *dcn32_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dio_regs + DIO_REG_LIST_DCN10(); + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn32_hubbub_create(struct dc_context *ctx) { int i; @@ -1494,6 +1524,11 @@ static void dcn32_resource_destruct(struct dcn32_resource_pool *pool) if (pool->base.dccg != NULL) dcn_dccg_destroy(&pool->base.dccg); + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + if (pool->base.oem_device != NULL) { struct dc *dc = pool->base.oem_device->ctx->dc; @@ -1785,7 +1820,10 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_valid dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_START(); dcn32_override_min_req_memclk(dc, context); + DC_FP_END(); + dcn32_override_min_req_dcfclk(dc, context); BW_VAL_TRACE_END_WATERMARKS(); @@ -2370,6 +2408,14 @@ static bool dcn32_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn32_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs, TGs, ABMs */ for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index c1582c2..990aec7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -69,6 +69,7 @@ #include "dce/dce_hwseq.h" #include "clk_mgr.h" #include "dio/virtual/virtual_stream_encoder.h" +#include "dio/dcn10/dcn10_dio.h" #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" @@ -639,6 +640,19 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static struct dcn_dio_registers dio_regs; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn321 = { .num_timing_generator = 4, .num_opp = 4, @@ -827,6 +841,22 @@ static struct clock_source *dcn321_clock_source_create( return NULL; } +static struct dio *dcn321_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dio_regs + DIO_REG_LIST_DCN10(); + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn321_hubbub_create(struct dc_context *ctx) { int i; @@ -1474,6 +1504,11 @@ static void dcn321_resource_destruct(struct dcn321_resource_pool *pool) if (pool->base.dccg != NULL) dcn_dccg_destroy(&pool->base.dccg); + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } + if (pool->base.oem_device != NULL) { struct dc *dc = pool->base.oem_device->ctx->dc; @@ -1872,6 +1907,14 @@ static bool dcn321_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn321_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs, TGs, ABMs */ for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 3494a40..598b2f2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -71,6 +71,7 @@ #include "dce/dce_hwseq.h" #include "clk_mgr.h" #include "dio/virtual/virtual_stream_encoder.h" +#include "dio/dcn10/dcn10_dio.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" #include "dcn35/dcn35_dccg.h" @@ -664,6 +665,19 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static struct dcn_dio_registers dio_regs; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn35 = { .num_timing_generator = 4, .num_opp = 4, @@ -973,6 +987,22 @@ static struct mpc *dcn35_mpc_create( return &mpc30->base; } +static struct dio *dcn35_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dio_regs + DIO_REG_LIST_DCN10(); + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx) { int i; @@ -1563,6 +1593,11 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool) if (pool->base.dccg != NULL) dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } } static struct hubp *dcn35_hubp_create( @@ -2033,6 +2068,14 @@ static bool dcn35_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn35_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 080bc7f..7e15d07 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -50,6 +50,7 @@ #include "dce/dce_hwseq.h" #include "clk_mgr.h" #include "dio/virtual/virtual_stream_encoder.h" +#include "dio/dcn10/dcn10_dio.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" #include "dcn35/dcn35_dccg.h" @@ -644,6 +645,19 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static struct dcn_dio_registers dio_regs; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn351 = { .num_timing_generator = 4, .num_opp = 4, @@ -953,6 +967,22 @@ static struct mpc *dcn35_mpc_create( return &mpc30->base; } +static struct dio *dcn351_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dio_regs + DIO_REG_LIST_DCN10(); + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx) { int i; @@ -1543,6 +1573,11 @@ static void dcn351_resource_destruct(struct dcn351_resource_pool *pool) if (pool->base.dccg != NULL) dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } } static struct hubp *dcn35_hubp_create( @@ -2005,6 +2040,14 @@ static bool dcn351_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn351_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index af51ac4e..83fee2c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
@@ -50,6 +50,7 @@ #include "dce/dce_hwseq.h" #include "clk_mgr.h" #include "dio/virtual/virtual_stream_encoder.h" +#include "dio/dcn10/dcn10_dio.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" #include "dcn35/dcn35_dccg.h" @@ -651,6 +652,19 @@ static const struct dcn20_vmid_mask vmid_masks = { DCN20_VMID_MASK_SH_LIST(_MASK) }; +static struct dcn_dio_registers dio_regs; + +#define DIO_MASK_SH_LIST(mask_sh)\ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh) + +static const struct dcn_dio_shift dio_shift = { + DIO_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn_dio_mask dio_mask = { + DIO_MASK_SH_LIST(_MASK) +}; + static const struct resource_caps res_cap_dcn36 = { .num_timing_generator = 4, .num_opp = 4, @@ -960,6 +974,22 @@ static struct mpc *dcn35_mpc_create( return &mpc30->base; } +static struct dio *dcn36_dio_create(struct dc_context *ctx) +{ + struct dcn10_dio *dio10 = kzalloc_obj(struct dcn10_dio); + + if (!dio10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dio_regs + DIO_REG_LIST_DCN10(); + + dcn10_dio_construct(dio10, ctx, &dio_regs, &dio_shift, &dio_mask); + + return &dio10->base; +} + static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx) { int i; @@ -1550,6 +1580,11 @@ static void dcn36_resource_destruct(struct dcn36_resource_pool *pool) if (pool->base.dccg != NULL) dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.dio != NULL) { + kfree(TO_DCN10_DIO(pool->base.dio)); + pool->base.dio = NULL; + } } static struct hubp *dcn35_hubp_create( @@ -2012,6 +2047,14 @@ static bool dcn36_resource_construct( goto create_fail; } + /* DIO */ + pool->base.dio = dcn36_dio_create(ctx); + if (pool->base.dio == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dio!\n"); + goto create_fail; + } + /* HUBPs, DPPs, OPPs and TGs */ for (i = 0; i < pool->base.pipe_count; i++) { pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h index 0e4c195..fe97943 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h
@@ -82,6 +82,24 @@ #define regMPASP_SMN_IH_SW_INT_CTRL 0x0142 #define regMPASP_SMN_IH_SW_INT_CTRL_BASE_IDX 0 +// addressBlock: mp_SmuMpASPPub_PcruDec +// base address: 0x3800000 +#define regMPASP_PCRU1_MPASP_C2PMSG_64 0x4280 +#define regMPASP_PCRU1_MPASP_C2PMSG_64_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_65 0x4281 +#define regMPASP_PCRU1_MPASP_C2PMSG_65_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_66 0x4282 +#define regMPASP_PCRU1_MPASP_C2PMSG_66_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_67 0x4283 +#define regMPASP_PCRU1_MPASP_C2PMSG_67_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_68 0x4284 +#define regMPASP_PCRU1_MPASP_C2PMSG_68_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_69 0x4285 +#define regMPASP_PCRU1_MPASP_C2PMSG_69_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_70 0x4286 +#define regMPASP_PCRU1_MPASP_C2PMSG_70_BASE_IDX 3 +#define regMPASP_PCRU1_MPASP_C2PMSG_71 0x4287 +#define regMPASP_PCRU1_MPASP_C2PMSG_71_BASE_IDX 3 // addressBlock: mp_SmuMp1_SmnDec // base address: 0x0
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 61b1c5a..3694246 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -3454,9 +3454,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (adev->asic_type == CHIP_HAINAN) { if ((adev->pdev->revision == 0x81) || (adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6660) || (adev->pdev->device == 0x6664) || (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) { + (adev->pdev->device == 0x6667) || + (adev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((adev->pdev->revision == 0xC3) ||
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 12b052d..7ca8fdd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -262,7 +262,6 @@ int smu_v11_0_check_fw_version(struct smu_context *smu) "smu fw program = %d, version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, smu_program, smu_version, smu_major, smu_minor, smu_debug); - dev_info(smu->adev->dev, "SMU driver if version not matched\n"); } return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 2c20624..ac5e44d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
@@ -101,7 +101,6 @@ int smu_v12_0_check_fw_version(struct smu_context *smu) "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, smu_program, smu_version, smu_major, smu_minor, smu_debug); - dev_info(smu->adev->dev, "SMU driver if version not matched\n"); } return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e030f1e..554f616 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_0_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1043,8 +1047,35 @@ static bool smu_v13_0_0_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, @@ -2034,6 +2065,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, smu, SMU_DRIVER_TABLE_GPU_METRICS); SmuMetricsExternal_t metrics_ext; SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + uint32_t mp1_ver = amdgpu_ip_version(smu->adev, MP1_HWIP, 0); int ret = 0; ret = smu_cmn_get_metrics_table(smu, @@ -2058,7 +2090,12 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if ((mp1_ver == IP_VERSION(13, 0, 0) && smu->smc_fw_version <= 0x004e1e00) || + (mp1_ver == IP_VERSION(13, 0, 10) && smu->smc_fw_version <= 0x00500800)) + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + else + gpu_metrics->energy_accumulator = UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; @@ -2216,7 +2253,8 @@ static int smu_v13_0_0_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_0_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 896b51c..870bcc8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1391,7 +1391,7 @@ static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) - return 0; + return -EOPNOTSUPP; size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", @@ -2122,6 +2122,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, { struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; uint32_t min_clk; uint32_t max_clk; @@ -2221,14 +2222,16 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, if (ret) return ret; - min_clk = SMU_DPM_TABLE_MIN( - &dpm_context->dpm_tables.uclk_table); - max_clk = SMU_DPM_TABLE_MAX( - &dpm_context->dpm_tables.uclk_table); - ret = smu_v13_0_6_set_soft_freq_limited_range( - smu, SMU_UCLK, min_clk, max_clk, false); - if (ret) - return ret; + if (SMU_DPM_TABLE_MAX(uclk_table) != + pstate_table->uclk_pstate.curr.max) { + min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table); + max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table); + ret = smu_v13_0_6_set_soft_freq_limited_range(smu, + SMU_UCLK, min_clk, + max_clk, false); + if (ret) + return ret; + } smu_v13_0_reset_custom_level(smu); } break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index af0482c..f331e87 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_7_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1053,8 +1057,35 @@ static bool smu_v13_0_7_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, @@ -2065,7 +2096,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + gpu_metrics->energy_accumulator = smu->smc_fw_version <= 0x00521400 ? + metrics->EnergyAccumulator : UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_7_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; @@ -2223,7 +2255,8 @@ static int smu_v13_0_7_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_7_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index cec2df1..e38354c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -284,7 +284,6 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, smu_program, smu_version, smu_major, smu_minor, smu_debug); - dev_info(adev->dev, "SMU driver if version not matched\n"); } return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 9994d43..c3ebfac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -56,6 +56,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v14_0_2_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK_BIT), @@ -922,8 +926,35 @@ static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, @@ -2311,7 +2342,8 @@ static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0)
diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c index 9e19d8c..677c52c 100644 --- a/drivers/gpu/drm/ast/ast_dp501.c +++ b/drivers/gpu/drm/ast/ast_dp501.c
@@ -436,7 +436,7 @@ static void ast_init_analog(struct ast_device *ast) /* Finally, clear bits [17:16] of SCU2c */ data = ast_read32(ast, 0x1202c); data &= 0xfffcffff; - ast_write32(ast, 0, data); + ast_write32(ast, 0x1202c, data); /* Disable DVO */ ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x00);
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 930aaa6..ec632f2 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -1881,6 +1881,14 @@ static int samsung_dsim_register_te_irq(struct samsung_dsim *dsi, struct device return 0; } +static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) +{ + if (dsi->te_gpio) { + free_irq(gpiod_to_irq(dsi->te_gpio), dsi); + gpiod_put(dsi->te_gpio); + } +} + static int samsung_dsim_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { @@ -1961,7 +1969,7 @@ static int samsung_dsim_host_attach(struct mipi_dsi_host *host, if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) { ret = samsung_dsim_register_te_irq(dsi, &device->dev); if (ret) - return ret; + goto err_remove_bridge; } // The next bridge can be used by host_ops->attach @@ -1982,15 +1990,12 @@ static int samsung_dsim_host_attach(struct mipi_dsi_host *host, err_release_next_bridge: drm_bridge_put(dsi->bridge.next_bridge); dsi->bridge.next_bridge = NULL; - return ret; -} -static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) -{ - if (dsi->te_gpio) { - free_irq(gpiod_to_irq(dsi->te_gpio), dsi); - gpiod_put(dsi->te_gpio); - } + if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) + samsung_dsim_unregister_te_irq(dsi); +err_remove_bridge: + drm_bridge_remove(&dsi->bridge); + return ret; } static int samsung_dsim_host_detach(struct mipi_dsi_host *host,
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c index 4ab6922..fd23ca2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-dp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c
@@ -2049,7 +2049,9 @@ struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, bridge->type = DRM_MODE_CONNECTOR_DisplayPort; bridge->ycbcr_420_allowed = true; - devm_drm_bridge_add(dev, bridge); + ret = devm_drm_bridge_add(dev, bridge); + if (ret) + return ERR_PTR(ret); dp->aux.dev = dev; dp->aux.drm_dev = encoder->dev;
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index ab7fed6..facfb75 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c
@@ -848,7 +848,7 @@ static int dw_hdmi_qp_config_audio_infoframe(struct dw_hdmi_qp *hdmi, regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS0, &header_bytes, 1); regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS1, &buffer[3], 1); - regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS2, &buffer[4], 1); + regmap_bulk_write(hdmi->regm, PKT_AUDI_CONTENTS2, &buffer[7], 1); /* Enable ACR, AUDI, AMD */ dw_hdmi_qp_mod(hdmi,
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index f6736b4..17a8852 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -351,9 +351,9 @@ static u8 sn65dsi83_get_dsi_range(struct sn65dsi83 *ctx, * DSI_CLK = mode clock * bpp / dsi_data_lanes / 2 * the 2 is there because the bus is DDR. */ - return DIV_ROUND_UP(clamp((unsigned int)mode->clock * - mipi_dsi_pixel_format_to_bpp(ctx->dsi->format) / - ctx->dsi->lanes / 2, 40000U, 500000U), 5000U); + return clamp((unsigned int)mode->clock * + mipi_dsi_pixel_format_to_bpp(ctx->dsi->format) / + ctx->dsi->lanes / 2, 40000U, 500000U) / 5000U; } static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx) @@ -517,6 +517,7 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); + const unsigned int dual_factor = ctx->lvds_dual_link ? 2 : 1; const struct drm_bridge_state *bridge_state; const struct drm_crtc_state *crtc_state; const struct drm_display_mode *mode; @@ -653,18 +654,18 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, /* 32 + 1 pixel clock to ensure proper operation */ le16val = cpu_to_le16(32 + 1); regmap_bulk_write(ctx->regmap, REG_VID_CHA_SYNC_DELAY_LOW, &le16val, 2); - le16val = cpu_to_le16(mode->hsync_end - mode->hsync_start); + le16val = cpu_to_le16((mode->hsync_end - mode->hsync_start) / dual_factor); regmap_bulk_write(ctx->regmap, REG_VID_CHA_HSYNC_PULSE_WIDTH_LOW, &le16val, 2); le16val = cpu_to_le16(mode->vsync_end - mode->vsync_start); regmap_bulk_write(ctx->regmap, REG_VID_CHA_VSYNC_PULSE_WIDTH_LOW, &le16val, 2); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_BACK_PORCH, - mode->htotal - mode->hsync_end); + (mode->htotal - mode->hsync_end) / dual_factor); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_BACK_PORCH, mode->vtotal - mode->vsync_end); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_FRONT_PORCH, - mode->hsync_start - mode->hdisplay); + (mode->hsync_start - mode->hdisplay) / dual_factor); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_FRONT_PORCH, mode->vsync_start - mode->vdisplay); regmap_write(ctx->regmap, REG_VID_CHA_TEST_PATTERN, 0x00);
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 276d05d..98d64ad 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1415,6 +1415,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, { struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); struct device_node *np = pdata->dev->of_node; + const struct i2c_client *client = to_i2c_client(pdata->dev); int ret; pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); @@ -1433,8 +1434,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | - DRM_BRIDGE_OP_HPD; + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (client->irq) + pdata->bridge.ops |= DRM_BRIDGE_OP_HPD; /* * If comms were already enabled they would have been enabled * with the wrong value of HPD_DISABLE. Update it now. Comms
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index d6f11c6..1987789 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c
@@ -1569,11 +1569,17 @@ EXPORT_SYMBOL(devm_drm_put_bridge); static void drm_bridge_debugfs_show_bridge(struct drm_printer *p, struct drm_bridge *bridge, unsigned int idx, - bool lingering) + bool lingering, + bool scoped) { + unsigned int refcount = kref_read(&bridge->refcount); + + if (scoped) + refcount--; + drm_printf(p, "bridge[%u]: %ps\n", idx, bridge->funcs); - drm_printf(p, "\trefcount: %u%s\n", kref_read(&bridge->refcount), + drm_printf(p, "\trefcount: %u%s\n", refcount, lingering ? " [lingering]" : ""); drm_printf(p, "\ttype: [%d] %s\n", @@ -1607,10 +1613,10 @@ static int allbridges_show(struct seq_file *m, void *data) mutex_lock(&bridge_lock); list_for_each_entry(bridge, &bridge_list, list) - drm_bridge_debugfs_show_bridge(&p, bridge, idx++, false); + drm_bridge_debugfs_show_bridge(&p, bridge, idx++, false, false); list_for_each_entry(bridge, &bridge_lingering_list, list) - drm_bridge_debugfs_show_bridge(&p, bridge, idx++, true); + drm_bridge_debugfs_show_bridge(&p, bridge, idx++, true, false); mutex_unlock(&bridge_lock); @@ -1625,7 +1631,7 @@ static int encoder_bridges_show(struct seq_file *m, void *data) unsigned int idx = 0; drm_for_each_bridge_in_chain_scoped(encoder, bridge) - drm_bridge_debugfs_show_bridge(&p, bridge, idx++, false); + drm_bridge_debugfs_show_bridge(&p, bridge, idx++, false, true); return 0; }
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 262b1b8..bb49b83 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -930,7 +930,8 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, mutex_unlock(&client->modeset_mutex); out: kfree(crtcs); - modes_destroy(dev, modes, connector_count); + if (modes) + modes_destroy(dev, modes, connector_count); kfree(modes); kfree(offsets); kfree(enabled);
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 7b5a499..c549293 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -550,27 +550,27 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); -static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, - struct page *page) +static vm_fault_t try_insert_pfn(struct vm_fault *vmf, unsigned int order, + unsigned long pfn) { + if (!order) { + return vmf_insert_pfn(vmf->vma, vmf->address, pfn); #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP - unsigned long pfn = page_to_pfn(page); - unsigned long paddr = pfn << PAGE_SHIFT; - bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK); + } else if (order == PMD_ORDER) { + unsigned long paddr = pfn << PAGE_SHIFT; + bool aligned = (vmf->address & ~PMD_MASK) == (paddr & ~PMD_MASK); - if (aligned && - pmd_none(*vmf->pmd) && - folio_test_pmd_mappable(page_folio(page))) { - pfn &= PMD_MASK >> PAGE_SHIFT; - if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE) - return true; - } + if (aligned && + folio_test_pmd_mappable(page_folio(pfn_to_page(pfn)))) { + pfn &= PMD_MASK >> PAGE_SHIFT; + return vmf_insert_pfn_pmd(vmf, pfn, false); + } #endif - - return false; + } + return VM_FAULT_FALLBACK; } -static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +static vm_fault_t drm_gem_shmem_any_fault(struct vm_fault *vmf, unsigned int order) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -581,6 +581,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) pgoff_t page_offset; unsigned long pfn; + if (order && order != PMD_ORDER) + return VM_FAULT_FALLBACK; + /* Offset to faulty address in the VMA. */ page_offset = vmf->pgoff - vma->vm_pgoff; @@ -593,13 +596,8 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) goto out; } - if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) { - ret = VM_FAULT_NOPAGE; - goto out; - } - pfn = page_to_pfn(pages[page_offset]); - ret = vmf_insert_pfn(vma, vmf->address, pfn); + ret = try_insert_pfn(vmf, order, pfn); out: dma_resv_unlock(shmem->base.resv); @@ -607,6 +605,11 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) return ret; } +static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +{ + return drm_gem_shmem_any_fault(vmf, 0); +} + static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; @@ -643,6 +646,9 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + .huge_fault = drm_gem_shmem_any_fault, +#endif .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, };
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 24180bfd..9ef9e52 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1338,14 +1338,14 @@ bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); /** - * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked + * drm_gpusvm_pages_valid_unlocked() - GPU SVM pages valid unlocked * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * - * This function determines if a GPU SVM range pages are valid. Expected be - * called without holding gpusvm->notifier_lock. + * This function determines if a GPU SVM pages are valid. Expected be called + * without holding gpusvm->notifier_lock. * - * Return: True if GPU SVM range has valid pages, False otherwise + * Return: True if GPU SVM pages are valid, False otherwise */ static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, struct drm_gpusvm_pages *svm_pages)
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index e6b5b06..f3e40d1 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c
@@ -28,6 +28,7 @@ * IN THE SOFTWARE. */ #include <linux/compat.h> +#include <linux/nospec.h> #include <linux/ratelimit.h> #include <linux/export.h> @@ -374,6 +375,7 @@ long drm_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (nr >= ARRAY_SIZE(drm_compat_ioctls)) return drm_ioctl(filp, cmd, arg); + nr = array_index_nospec(nr, ARRAY_SIZE(drm_compat_ioctls)); fn = drm_compat_ioctls[nr].fn; if (!fn) return drm_ioctl(filp, cmd, arg);
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index bdc7914..862675a 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c
@@ -480,18 +480,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, .start = start, .end = end, .pgmap_owner = pagemap->owner, - /* - * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently - * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa - * engine reset and a hard hang due to getting stuck on a folio - * lock. This should work and needs to be root-caused. The only - * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE - * is that device-to-device migrations won’t work; instead, - * memory will bounce through system memory. This path should be - * rare and only occur when the madvise attributes of memory are - * changed or atomics are being used. - */ - .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_DEVICE_PRIVATE, }; unsigned long i, npages = npages_in_range(start, end); unsigned long own_pages = 0, migrated_pages = 0;
diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c index 14ddb94..6111d90 100644 --- a/drivers/gpu/drm/drm_pagemap_util.c +++ b/drivers/gpu/drm/drm_pagemap_util.c
@@ -65,18 +65,14 @@ static void drm_pagemap_cache_fini(void *arg) drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n"); spin_lock(&cache->lock); dpagemap = cache->dpagemap; - if (!dpagemap) { - spin_unlock(&cache->lock); - goto out; - } + cache->dpagemap = NULL; + if (dpagemap && !drm_pagemap_shrinker_cancel(dpagemap)) + dpagemap = NULL; + spin_unlock(&cache->lock); - if (drm_pagemap_shrinker_cancel(dpagemap)) { - cache->dpagemap = NULL; - spin_unlock(&cache->lock); + if (dpagemap) drm_pagemap_destroy(dpagemap, false); - } -out: mutex_destroy(&cache->lookup_mutex); kfree(cache); }
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 250734d..8d9fd19 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c
@@ -602,7 +602,7 @@ int drm_syncobj_get_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj); @@ -716,7 +716,7 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj);
diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index d0122d4..17c2dea 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c
@@ -339,7 +339,9 @@ static int gud_stats_debugfs(struct seq_file *m, void *data) } static const struct drm_crtc_helper_funcs gud_crtc_helper_funcs = { - .atomic_check = drm_crtc_helper_atomic_check + .atomic_check = drm_crtc_helper_atomic_check, + .atomic_enable = gud_crtc_atomic_enable, + .atomic_disable = gud_crtc_atomic_disable, }; static const struct drm_crtc_funcs gud_crtc_funcs = { @@ -364,6 +366,10 @@ static const struct drm_plane_funcs gud_plane_funcs = { DRM_GEM_SHADOW_PLANE_FUNCS, }; +static const struct drm_mode_config_helper_funcs gud_mode_config_helpers = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; + static const struct drm_mode_config_funcs gud_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .atomic_check = drm_atomic_helper_check, @@ -499,6 +505,7 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) drm->mode_config.min_height = le32_to_cpu(desc.min_height); drm->mode_config.max_height = le32_to_cpu(desc.max_height); drm->mode_config.funcs = &gud_mode_config_funcs; + drm->mode_config.helper_private = &gud_mode_config_helpers; /* Format init */ formats_dev = devm_kmalloc(dev, GUD_FORMATS_MAX_NUM, GFP_KERNEL);
diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index d27c316..8eec833 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h
@@ -62,6 +62,10 @@ int gud_usb_set_u8(struct gud_device *gdrm, u8 request, u8 val); void gud_clear_damage(struct gud_device *gdrm); void gud_flush_work(struct work_struct *work); +void gud_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state); +void gud_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state); int gud_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state); void gud_plane_atomic_update(struct drm_plane *plane,
diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index 4b77be9..b355bf4 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c
@@ -580,6 +580,39 @@ int gud_plane_atomic_check(struct drm_plane *plane, return ret; } +void gud_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *drm = crtc->dev; + struct gud_device *gdrm = to_gud_device(drm); + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 1); + gud_usb_set(gdrm, GUD_REQ_SET_STATE_COMMIT, 0, NULL, 0); + gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, 1); + + drm_dev_exit(idx); +} + +void gud_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *drm = crtc->dev; + struct gud_device *gdrm = to_gud_device(drm); + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, 0); + gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0); + + drm_dev_exit(idx); +} + void gud_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *atomic_state) { @@ -607,24 +640,12 @@ void gud_plane_atomic_update(struct drm_plane *plane, mutex_unlock(&gdrm->damage_lock); } - if (!drm_dev_enter(drm, &idx)) + if (!crtc || !drm_dev_enter(drm, &idx)) return; - if (!old_state->fb) - gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 1); - - if (fb && (crtc->state->mode_changed || crtc->state->connectors_changed)) - gud_usb_set(gdrm, GUD_REQ_SET_STATE_COMMIT, 0, NULL, 0); - - if (crtc->state->active_changed) - gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, crtc->state->active); - - if (!fb) - goto ctrl_disable; - ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) - goto ctrl_disable; + goto out; drm_atomic_helper_damage_iter_init(&iter, old_state, new_state); drm_atomic_for_each_plane_damage(&iter, &damage) @@ -632,9 +653,6 @@ void gud_plane_atomic_update(struct drm_plane *plane, drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); -ctrl_disable: - if (!crtc->state->enable) - gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0); - +out: drm_dev_exit(idx); }
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 4cb7531..c7fa014 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -137,7 +137,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_dp->DP |= DP_SYNC_VS_HIGH; intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT; - if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + if (pipe_config->enhanced_framing) intel_dp->DP |= DP_ENHANCED_FRAMING; intel_dp->DP |= DP_PIPE_SEL_IVB(crtc->pipe);
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index fc265f7..298b3a4 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -889,7 +889,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * non-compressed link speeds, and simplifies down to the ratio between * compressed and non-compressed bpp. */ - if (crtc_state->dsc.compression_enable) { + if (is_vid_mode(intel_dsi) && crtc_state->dsc.compression_enable) { mul = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); } @@ -1503,7 +1503,7 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - if (pipe_config->dsc.compressed_bpp_x16) { + if (is_vid_mode(intel_dsi) && pipe_config->dsc.compressed_bpp_x16) { int div = fxp_q4_to_int(pipe_config->dsc.compressed_bpp_x16); int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format);
diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 7ce8c67..f4f1b68 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c
@@ -43,12 +43,6 @@ bool intel_alpm_is_alpm_aux_less(struct intel_dp *intel_dp, void intel_alpm_init(struct intel_dp *intel_dp) { - u8 dpcd; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &dpcd) < 0) - return; - - intel_dp->alpm_dpcd = dpcd; mutex_init(&intel_dp->alpm.lock); } @@ -562,12 +556,7 @@ void intel_alpm_disable(struct intel_dp *intel_dp) mutex_lock(&intel_dp->alpm.lock); intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder), - ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE | - ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); - - intel_de_rmw(display, - PORT_ALPM_CTL(cpu_transcoder), - PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); + ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE, 0); drm_dbg_kms(display->drm, "Disabling ALPM\n"); mutex_unlock(&intel_dp->alpm.lock);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f5946e6..3d7b4b0 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2971,6 +2971,53 @@ static int intel_cdclk_update_crtc_min_cdclk(struct intel_atomic_state *state, return 0; } +static int intel_cdclk_update_crtc_min_voltage_level(struct intel_atomic_state *state, + struct intel_crtc *crtc, + u8 old_min_voltage_level, + u8 new_min_voltage_level, + bool *need_cdclk_calc) +{ + struct intel_display *display = to_intel_display(state); + struct intel_cdclk_state *cdclk_state; + bool allow_voltage_level_decrease = intel_any_crtc_needs_modeset(state); + int ret; + + if (new_min_voltage_level == old_min_voltage_level) + return 0; + + if (!allow_voltage_level_decrease && + new_min_voltage_level < old_min_voltage_level) + return 0; + + cdclk_state = intel_atomic_get_cdclk_state(state); + if (IS_ERR(cdclk_state)) + return PTR_ERR(cdclk_state); + + old_min_voltage_level = cdclk_state->min_voltage_level[crtc->pipe]; + + if (new_min_voltage_level == old_min_voltage_level) + return 0; + + if (!allow_voltage_level_decrease && + new_min_voltage_level < old_min_voltage_level) + return 0; + + cdclk_state->min_voltage_level[crtc->pipe] = new_min_voltage_level; + + ret = intel_atomic_lock_global_state(&cdclk_state->base); + if (ret) + return ret; + + *need_cdclk_calc = true; + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] min voltage level: %d -> %d\n", + crtc->base.base.id, crtc->base.name, + old_min_voltage_level, new_min_voltage_level); + + return 0; +} + int intel_cdclk_update_dbuf_bw_min_cdclk(struct intel_atomic_state *state, int old_min_cdclk, int new_min_cdclk, bool *need_cdclk_calc) @@ -3386,6 +3433,13 @@ static int intel_crtcs_calc_min_cdclk(struct intel_atomic_state *state, need_cdclk_calc); if (ret) return ret; + + ret = intel_cdclk_update_crtc_min_voltage_level(state, crtc, + old_crtc_state->min_voltage_level, + new_crtc_state->min_voltage_level, + need_cdclk_calc); + if (ret) + return ret; } return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3b8ba8a..0f82bf7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1614,7 +1614,6 @@ static void hsw_configure_cpu_transcoder(const struct intel_crtc_state *crtc_sta } intel_set_transcoder_timings(crtc_state); - intel_vrr_set_transcoder_timings(crtc_state); if (cpu_transcoder != TRANSCODER_EDP) intel_de_write(display, TRANS_MULT(display, cpu_transcoder), @@ -4603,6 +4602,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_crtc_state *saved_state; + int err; saved_state = intel_crtc_state_alloc(crtc); if (!saved_state) @@ -4611,7 +4611,12 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, /* free the old crtc_state->hw members */ intel_crtc_free_hw_state(crtc_state); - intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + err = intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + if (err) { + kfree(saved_state); + + return err; + } /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index db185a8..fba9fa4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c
@@ -806,7 +806,7 @@ void gen9_set_dc_state(struct intel_display *display, u32 state) power_domains->dc_state, val & mask); enable_dc6 = state & DC_STATE_EN_UPTO_DC6; - dc6_was_enabled = val & DC_STATE_EN_UPTO_DC6; + dc6_was_enabled = power_domains->dc_state & DC_STATE_EN_UPTO_DC6; if (!dc6_was_enabled && enable_dc6) intel_dmc_update_dc6_allowed_count(display, true);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6b92f33..ced0e5a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1186,6 +1186,7 @@ struct intel_crtc_state { u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; + u8 entry_setup_frames; const char *no_psr_reason; /*
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 1006b06..0b15cb7 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -1599,8 +1599,7 @@ static bool intel_dmc_get_dc6_allowed_count(struct intel_display *display, u32 * return false; mutex_lock(&power_domains->lock); - dc6_enabled = intel_de_read(display, DC_STATE_EN) & - DC_STATE_EN_UPTO_DC6; + dc6_enabled = power_domains->dc_state & DC_STATE_EN_UPTO_DC6; if (dc6_enabled) intel_dmc_update_dc6_allowed_count(display, false);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 559cf3b..696edf4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -4577,6 +4577,7 @@ static bool intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector) { struct intel_display *display = to_intel_display(intel_dp); + int ret; /* this function is meant to be called only once */ drm_WARN_ON(display->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); @@ -4616,6 +4617,12 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector */ intel_dp_init_source_oui(intel_dp); + /* Read the ALPM DPCD caps */ + ret = drm_dp_dpcd_read_byte(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, + &intel_dp->alpm_dpcd); + if (ret < 0) + return false; + /* * This has to be called after intel_dp->edp_dpcd is filled, PSR checks * for SET_POWER_CAPABLE bit in intel_dp->edp_dpcd[1]
diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c index 83865c0..55b423fd 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c
@@ -621,19 +621,27 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, * * Clear any DP tunnel stream BW requirement set by * intel_dp_tunnel_atomic_compute_stream_bw(). + * + * Returns 0 in case of success, a negative error code otherwise. */ -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int err; if (!crtc_state->dp_tunnel_ref.tunnel) - return; + return 0; - drm_dp_tunnel_atomic_set_stream_bw(&state->base, - crtc_state->dp_tunnel_ref.tunnel, - crtc->pipe, 0); + err = drm_dp_tunnel_atomic_set_stream_bw(&state->base, + crtc_state->dp_tunnel_ref.tunnel, + crtc->pipe, 0); + if (err) + return err; + drm_dp_tunnel_ref_put(&crtc_state->dp_tunnel_ref); + + return 0; } /**
diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h index 7f0f720..10ab9ee 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
@@ -40,8 +40,8 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, struct intel_dp *intel_dp, const struct intel_connector *connector, struct intel_crtc_state *crtc_state); -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state); +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state); int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state, struct intel_crtc *crtc); @@ -88,9 +88,12 @@ intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, return 0; } -static inline void +static inline int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) {} + struct intel_crtc_state *crtc_state) +{ + return 0; +} static inline int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index a7bce0c..264e684 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -496,8 +496,10 @@ gmbus_xfer_read_chunk(struct intel_display *display, val = intel_de_read_fw(display, GMBUS3(display)); do { - if (extra_byte_added && len == 1) + if (extra_byte_added && len == 1) { + len--; break; + } *buf++ = val & 0xff; val >>= 8;
diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index e06a061..076b9b3 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c
@@ -436,11 +436,16 @@ void intel_plane_copy_hw_state(struct intel_plane_state *plane_state, drm_framebuffer_get(plane_state->hw.fb); } +static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state); + void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + unlink_nv12_plane(crtc_state, plane_state); + crtc_state->active_planes &= ~BIT(plane->id); crtc_state->scaled_planes &= ~BIT(plane->id); crtc_state->nv12_planes &= ~BIT(plane->id); @@ -1513,6 +1518,9 @@ static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + if (!plane_state->planar_linked_plane) + return; + plane_state->planar_linked_plane = NULL; if (!plane_state->is_y_plane) @@ -1550,8 +1558,7 @@ static int icl_check_nv12_planes(struct intel_atomic_state *state, if (plane->pipe != crtc->pipe) continue; - if (plane_state->planar_linked_plane) - unlink_nv12_plane(crtc_state, plane_state); + unlink_nv12_plane(crtc_state, plane_state); } if (!crtc_state->nv12_planes)
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 62208ff..3791944 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1307,9 +1307,14 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state, u16 sink_y_granularity = crtc_state->has_panel_replay ? connector->dp.panel_replay_caps.su_y_granularity : connector->dp.psr_caps.su_y_granularity; - u16 sink_w_granularity = crtc_state->has_panel_replay ? - connector->dp.panel_replay_caps.su_w_granularity : - connector->dp.psr_caps.su_w_granularity; + u16 sink_w_granularity; + + if (crtc_state->has_panel_replay) + sink_w_granularity = connector->dp.panel_replay_caps.su_w_granularity == + DP_PANEL_REPLAY_FULL_LINE_GRANULARITY ? + crtc_hdisplay : connector->dp.panel_replay_caps.su_w_granularity; + else + sink_w_granularity = connector->dp.psr_caps.su_w_granularity; /* PSR2 HW only send full lines so we only need to validate the width */ if (crtc_hdisplay % sink_w_granularity) @@ -1712,7 +1717,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, entry_setup_frames = intel_psr_entry_setup_frames(intel_dp, conn_state, adjusted_mode); if (entry_setup_frames >= 0) { - intel_dp->psr.entry_setup_frames = entry_setup_frames; + crtc_state->entry_setup_frames = entry_setup_frames; } else { crtc_state->no_psr_reason = "PSR setup timing not met"; drm_dbg_kms(display->drm, @@ -1810,7 +1815,7 @@ static bool intel_psr_needs_wa_18037818876(struct intel_dp *intel_dp, { struct intel_display *display = to_intel_display(intel_dp); - return (DISPLAY_VER(display) == 20 && intel_dp->psr.entry_setup_frames > 0 && + return (DISPLAY_VER(display) == 20 && crtc_state->entry_setup_frames > 0 && !crtc_state->has_sel_update); } @@ -2184,6 +2189,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.pkg_c_latency_used = crtc_state->pkg_c_latency_used; intel_dp->psr.io_wake_lines = crtc_state->alpm_state.io_wake_lines; intel_dp->psr.fast_wake_lines = crtc_state->alpm_state.fast_wake_lines; + intel_dp->psr.entry_setup_frames = crtc_state->entry_setup_frames; if (!psr_interrupt_error_check(intel_dp)) return; @@ -2614,6 +2620,12 @@ void intel_psr2_program_trans_man_trk_ctl(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), crtc_state->pipe_srcsz_early_tpt); + + if (!crtc_state->dsc.compression_enable) + return; + + intel_dsc_su_et_parameters_configure(dsb, encoder, crtc_state, + drm_rect_height(&crtc_state->psr2_su_area)); } static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, @@ -2684,11 +2696,12 @@ static void clip_area_update(struct drm_rect *overlap_damage_area, overlap_damage_area->y2 = damage_area->y2; } -static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) +static bool intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; u16 y_alignment; + bool su_area_changed = false; /* ADLP aligns the SU region to vdsc slice height in case dsc is enabled */ if (crtc_state->dsc.compression_enable && @@ -2697,10 +2710,18 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st else y_alignment = crtc_state->su_y_granularity; - crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; - if (crtc_state->psr2_su_area.y2 % y_alignment) + if (crtc_state->psr2_su_area.y1 % y_alignment) { + crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; + su_area_changed = true; + } + + if (crtc_state->psr2_su_area.y2 % y_alignment) { crtc_state->psr2_su_area.y2 = ((crtc_state->psr2_su_area.y2 / y_alignment) + 1) * y_alignment; + su_area_changed = true; + } + + return su_area_changed; } /* @@ -2834,7 +2855,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_plane_state *new_plane_state, *old_plane_state; struct intel_plane *plane; - bool full_update = false, cursor_in_su_area = false; + bool full_update = false, su_area_changed; int i, ret; if (!crtc_state->enable_psr2_sel_fetch) @@ -2941,15 +2962,32 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (ret) return ret; - /* - * Adjust su area to cover cursor fully as necessary (early - * transport). This needs to be done after - * drm_atomic_add_affected_planes to ensure visible cursor is added into - * affected planes even when cursor is not updated by itself. - */ - intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); + do { + bool cursor_in_su_area; - intel_psr2_sel_fetch_pipe_alignment(crtc_state); + /* + * Adjust su area to cover cursor fully as necessary + * (early transport). This needs to be done after + * drm_atomic_add_affected_planes to ensure visible + * cursor is added into affected planes even when + * cursor is not updated by itself. + */ + intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); + + su_area_changed = intel_psr2_sel_fetch_pipe_alignment(crtc_state); + + /* + * If the cursor was outside the SU area before + * alignment, the alignment step (which only expands + * SU) may pull the cursor partially inside, so we + * must run ET alignment again to fully cover it. But + * if the cursor was already fully inside before + * alignment, expanding the SU area won't change that, + * so no further work is needed. + */ + if (cursor_in_su_area) + break; + } while (su_area_changed); /* * Now that we have the pipe damaged area check if it intersect with @@ -3009,6 +3047,10 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, } skip_sel_fetch_set_loop: + if (full_update) + clip_area_update(&crtc_state->psr2_su_area, &crtc_state->pipe_src, + &crtc_state->pipe_src); + psr2_man_trk_ctl_calc(crtc_state, full_update); crtc_state->pipe_srcsz_early_tpt = psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update); @@ -3068,6 +3110,8 @@ void intel_psr_pre_plane_update(struct intel_atomic_state *state, * - Display WA #1136: skl, bxt */ if (intel_crtc_needs_modeset(new_crtc_state) || + new_crtc_state->update_m_n || + new_crtc_state->update_lrr || !new_crtc_state->has_psr || !new_crtc_state->active_planes || new_crtc_state->has_sel_update != psr->sel_update_enabled ||
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 5493082..2065dac 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -767,6 +767,29 @@ void intel_dsc_dp_pps_write(struct intel_encoder *encoder, sizeof(dp_dsc_pps_sdp)); } +void intel_dsc_su_et_parameters_configure(struct intel_dsb *dsb, struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, int su_lines) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + enum pipe pipe = crtc->pipe; + int vdsc_instances_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state); + int slice_row_per_frame = su_lines / vdsc_cfg->slice_height; + u32 val; + + drm_WARN_ON_ONCE(display->drm, su_lines % vdsc_cfg->slice_height); + drm_WARN_ON_ONCE(display->drm, vdsc_instances_per_pipe > 2); + + val = DSC_SUPS0_SU_SLICE_ROW_PER_FRAME(slice_row_per_frame); + val |= DSC_SUPS0_SU_PIC_HEIGHT(su_lines); + + intel_de_write_dsb(display, dsb, LNL_DSC0_SU_PARAMETER_SET_0(pipe), val); + + if (vdsc_instances_per_pipe == 2) + intel_de_write_dsb(display, dsb, LNL_DSC1_SU_PARAMETER_SET_0(pipe), val); +} + static i915_reg_t dss_ctl1_reg(struct intel_crtc *crtc, enum transcoder cpu_transcoder) { return is_pipe_dsc(crtc, cpu_transcoder) ?
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 99f64ac..99bb904 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h
@@ -13,6 +13,7 @@ struct drm_printer; enum transcoder; struct intel_crtc; struct intel_crtc_state; +struct intel_dsb; struct intel_encoder; bool intel_dsc_source_support(const struct intel_crtc_state *crtc_state); @@ -31,6 +32,8 @@ void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +void intel_dsc_su_et_parameters_configure(struct intel_dsb *dsb, struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, int su_lines); void intel_vdsc_state_dump(struct drm_printer *p, int indent, const struct intel_crtc_state *crtc_state); int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index 2d478a84..2b2e3c1 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
@@ -196,6 +196,18 @@ #define DSC_PPS18_NSL_BPG_OFFSET(offset) REG_FIELD_PREP(DSC_PPS18_NSL_BPG_OFFSET_MASK, offset) #define DSC_PPS18_SL_OFFSET_ADJ(offset) REG_FIELD_PREP(DSC_PPS18_SL_OFFSET_ADJ_MASK, offset) +#define _LNL_DSC0_SU_PARAMETER_SET_0_PA 0x78064 +#define _LNL_DSC1_SU_PARAMETER_SET_0_PA 0x78164 +#define _LNL_DSC0_SU_PARAMETER_SET_0_PB 0x78264 +#define _LNL_DSC1_SU_PARAMETER_SET_0_PB 0x78364 +#define LNL_DSC0_SU_PARAMETER_SET_0(pipe) _MMIO_PIPE((pipe), _LNL_DSC0_SU_PARAMETER_SET_0_PA, _LNL_DSC0_SU_PARAMETER_SET_0_PB) +#define LNL_DSC1_SU_PARAMETER_SET_0(pipe) _MMIO_PIPE((pipe), _LNL_DSC1_SU_PARAMETER_SET_0_PA, _LNL_DSC1_SU_PARAMETER_SET_0_PB) + +#define DSC_SUPS0_SU_SLICE_ROW_PER_FRAME_MASK REG_GENMASK(31, 20) +#define DSC_SUPS0_SU_SLICE_ROW_PER_FRAME(rows) REG_FIELD_PREP(DSC_SUPS0_SU_SLICE_ROW_PER_FRAME_MASK, (rows)) +#define DSC_SUPS0_SU_PIC_HEIGHT_MASK REG_GENMASK(15, 0) +#define DSC_SUPS0_SU_PIC_HEIGHT(h) REG_FIELD_PREP(DSC_SUPS0_SU_PIC_HEIGHT_MASK, (h)) + /* Icelake Rate Control Buffer Threshold Registers */ #define DSCA_RC_BUF_THRESH_0 _MMIO(0x6B230) #define DSCA_RC_BUF_THRESH_0_UDW _MMIO(0x6B230 + 4)
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index db74744..bea0057 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -598,6 +598,18 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) return; /* + * Bspec says: + * "(note: VRR needs to be programmed after + * TRANS_DDI_FUNC_CTL and before TRANS_CONF)." + * + * In practice it turns out that ICL can hang if + * TRANS_VRR_VMAX/FLIPLINE are written before + * enabling TRANS_DDI_FUNC_CTL. + */ + drm_WARN_ON(display->drm, + !(intel_de_read(display, TRANS_DDI_FUNC_CTL(display, cpu_transcoder)) & TRANS_DDI_FUNC_ENABLE)); + + /* * This bit seems to have two meanings depending on the platform: * TGL: generate VRR "safe window" for DSB vblank waits * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR @@ -939,6 +951,8 @@ void intel_vrr_transcoder_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); + intel_vrr_set_transcoder_timings(crtc_state); + if (!intel_vrr_possible(crtc_state)) return;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e7918f8..942f4ee 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -898,6 +898,8 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); if (likely(vma && vma->vm == vm)) vma = i915_vma_tryget(vma); + else + vma = NULL; rcu_read_unlock(); if (likely(vma)) return vma;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index c6c64ba..720a9ad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -153,8 +153,12 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); - nr_pages = min_t(unsigned long, - folio_nr_pages(folio), page_count - i); + nr_pages = min_array(((unsigned long[]) { + folio_nr_pages(folio), + page_count - i, + max_segment / PAGE_SIZE, + }), 3); + if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -164,7 +168,9 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents++; sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { - /* XXX: could overflow? */ + nr_pages = min_t(unsigned long, nr_pages, + (max_segment - sg->length) / PAGE_SIZE); + sg->length += nr_pages * PAGE_SIZE; } next_pfn = folio_pfn(folio) + nr_pages;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d37966e..54c9571 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1967,7 +1967,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) if (engine->sanitize) engine->sanitize(engine); - engine->set_default_submission(engine); + if (engine->set_default_submission) + engine->set_default_submission(engine); } }
diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h index 7376898..e1ed792 100644 --- a/drivers/gpu/drm/i915/i915_wait_util.h +++ b/drivers/gpu/drm/i915/i915_wait_util.h
@@ -25,9 +25,9 @@ might_sleep(); \ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ /* Guarantee COND check prior to timeout */ \ barrier(); \ + OP; \ if (COND) { \ ret__ = 0; \ break; \
diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index f58bb66..dbb6f5a8 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c
@@ -225,29 +225,12 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data) } if (pvr_dev->has_safety_events) { - int err; - - /* - * Ensure the GPU is powered on since some safety events (such - * as ECC faults) can happen outside of job submissions, which - * are otherwise the only time a power reference is held. - */ - err = pvr_power_get(pvr_dev); - if (err) { - drm_err_ratelimited(drm_dev, - "%s: could not take power reference (%d)\n", - __func__, err); - return ret; - } - while (pvr_device_safety_irq_pending(pvr_dev)) { pvr_device_safety_irq_clear(pvr_dev); pvr_device_handle_safety_events(pvr_dev); ret = IRQ_HANDLED; } - - pvr_power_put(pvr_dev); } return ret;
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 0cf7393..3ec4ec4 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -90,11 +90,11 @@ pvr_power_request_pwr_off(struct pvr_device *pvr_dev) } static int -pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset) +pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset, bool rpm_suspend) { - if (!hard_reset) { - int err; + int err; + if (!hard_reset) { cancel_delayed_work_sync(&pvr_dev->watchdog.work); err = pvr_power_request_idle(pvr_dev); @@ -106,29 +106,47 @@ pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset) return err; } - return pvr_fw_stop(pvr_dev); + if (rpm_suspend) { + /* This also waits for late processing of GPU or firmware IRQs in other cores */ + disable_irq(pvr_dev->irq); + } + + err = pvr_fw_stop(pvr_dev); + if (err && rpm_suspend) + enable_irq(pvr_dev->irq); + + return err; } static int -pvr_power_fw_enable(struct pvr_device *pvr_dev) +pvr_power_fw_enable(struct pvr_device *pvr_dev, bool rpm_resume) { int err; + if (rpm_resume) + enable_irq(pvr_dev->irq); + err = pvr_fw_start(pvr_dev); if (err) - return err; + goto out; err = pvr_wait_for_fw_boot(pvr_dev); if (err) { drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n"); pvr_fw_stop(pvr_dev); - return err; + goto out; } queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work, msecs_to_jiffies(WATCHDOG_TIME_MS)); return 0; + +out: + if (rpm_resume) + disable_irq(pvr_dev->irq); + + return err; } bool @@ -361,7 +379,7 @@ pvr_power_device_suspend(struct device *dev) return -EIO; if (pvr_dev->fw_dev.booted) { - err = pvr_power_fw_disable(pvr_dev, false); + err = pvr_power_fw_disable(pvr_dev, false, true); if (err) goto err_drm_dev_exit; } @@ -391,7 +409,7 @@ pvr_power_device_resume(struct device *dev) goto err_drm_dev_exit; if (pvr_dev->fw_dev.booted) { - err = pvr_power_fw_enable(pvr_dev); + err = pvr_power_fw_enable(pvr_dev, true); if (err) goto err_power_off; } @@ -510,7 +528,16 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) } /* Disable IRQs for the duration of the reset. */ - disable_irq(pvr_dev->irq); + if (hard_reset) { + disable_irq(pvr_dev->irq); + } else { + /* + * Soft reset is triggered as a response to a FW command to the Host and is + * processed from the threaded IRQ handler. This code cannot (nor needs to) + * wait for any IRQ processing to complete. + */ + disable_irq_nosync(pvr_dev->irq); + } do { if (hard_reset) { @@ -518,7 +545,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) queues_disabled = true; } - err = pvr_power_fw_disable(pvr_dev, hard_reset); + err = pvr_power_fw_disable(pvr_dev, hard_reset, false); if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; @@ -541,7 +568,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) pvr_fw_irq_clear(pvr_dev); - err = pvr_power_fw_enable(pvr_dev); + err = pvr_power_fw_enable(pvr_dev, false); } if (err && hard_reset)
diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index dea8557..4ce772bc 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c
@@ -256,7 +256,9 @@ static int imx_pd_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxpd); - devm_drm_bridge_add(dev, &imxpd->bridge); + ret = devm_drm_bridge_add(dev, &imxpd->bridge); + if (ret) + return ret; return component_add(dev, &imx_pd_ops); }
diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c index 204b0fe..bbebf4f 100644 --- a/drivers/gpu/drm/logicvc/logicvc_drm.c +++ b/drivers/gpu/drm/logicvc/logicvc_drm.c
@@ -92,7 +92,6 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) struct device *dev = drm_dev->dev; struct device_node *of_node = dev->of_node; struct logicvc_drm_config *config = &logicvc->config; - struct device_node *layers_node; int ret; logicvc_of_property_parse_bool(of_node, LOGICVC_OF_PROPERTY_DITHERING, @@ -128,7 +127,8 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) if (ret) return ret; - layers_node = of_get_child_by_name(of_node, "layers"); + struct device_node *layers_node __free(device_node) = + of_get_child_by_name(of_node, "layers"); if (!layers_node) { drm_err(drm_dev, "Missing non-optional layers node\n"); return -EINVAL;
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 17c67f0..aaf6c9e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -1236,6 +1236,11 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->host.ops = &mtk_dsi_ops; dsi->host.dev = dev; + + init_waitqueue_head(&dsi->irq_wait_queue); + + platform_set_drvdata(pdev, dsi); + ret = mipi_dsi_host_register(&dsi->host); if (ret < 0) return dev_err_probe(dev, ret, "Failed to register DSI host\n"); @@ -1247,10 +1252,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request DSI irq\n"); } - init_waitqueue_head(&dsi->irq_wait_queue); - - platform_set_drvdata(pdev, dsi); - dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI;
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index d77b477..e2225c5 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c
@@ -78,7 +78,7 @@ static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); - dma_free_attrs(mmu->dev, TABLE_SIZE, gpummu->table, gpummu->pt_base, + dma_free_attrs(mmu->dev, TABLE_SIZE + 32, gpummu->table, gpummu->pt_base, DMA_ATTR_FORCE_CONTIGUOUS); kfree(gpummu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 550a53a..38561f2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c
@@ -1759,7 +1759,7 @@ static const u32 x285_protect_regs[] = { A6XX_PROTECT_NORDWR(0x27c06, 0x0000), }; -DECLARE_ADRENO_PROTECT(x285_protect, 64); +DECLARE_ADRENO_PROTECT(x285_protect, 15); static const struct adreno_reglist_pipe a840_nonctxt_regs[] = { { REG_A8XX_CP_SMMU_STREAM_ID_LPAC, 0x00000101, BIT(PIPE_NONE) }, @@ -1966,5 +1966,4 @@ static inline __always_unused void __build_asserts(void) BUILD_BUG_ON(a660_protect.count > a660_protect.count_max); BUILD_BUG_ON(a690_protect.count > a690_protect.count_max); BUILD_BUG_ON(a730_protect.count > a730_protect.count_max); - BUILD_BUG_ON(a840_protect.count > a840_protect.count_max); }
diff --git a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c index 5a320f5..b1887e0 100644 --- a/drivers/gpu/drm/msm/adreno/a8xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a8xx_gpu.c
@@ -310,11 +310,21 @@ static void a8xx_set_ubwc_config(struct msm_gpu *gpu) hbb = cfg->highest_bank_bit - 13; hbb_hi = hbb >> 2; hbb_lo = hbb & 3; - a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); - a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); + + a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, + hbb << 5 | + level3_swizzling_dis << 4 | + level2_swizzling_dis << 3); + + a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, + hbb << 5 | + level3_swizzling_dis << 4 | + level2_swizzling_dis << 3); a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, yuvnotcomptofc << 6 | + level3_swizzling_dis << 5 | + level2_swizzling_dis << 4 | hbb_hi << 3 | hbb_lo << 1);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 554d746..4edfe80 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -302,6 +302,7 @@ static const struct of_device_id dt_match[] = { { .compatible = "qcom,kgsl-3d0" }, {} }; +MODULE_DEVICE_TABLE(of, dt_match); static int adreno_runtime_resume(struct device *dev) {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 303d33d..9f2bcec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -133,7 +133,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { static const struct dpu_lm_cfg sc8280xp_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -141,7 +141,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -149,7 +149,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -157,7 +157,7 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -165,14 +165,14 @@ static const struct dpu_lm_cfg sc8280xp_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index b09a6af..04b2216 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -134,7 +134,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { static const struct dpu_lm_cfg sm8450_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -142,7 +142,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -150,7 +150,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -158,7 +158,7 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -166,14 +166,14 @@ static const struct dpu_lm_cfg sm8450_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h index 0f7b4a2..42cf3bd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_4_sa8775p.h
@@ -366,8 +366,8 @@ static const struct dpu_intf_cfg sa8775p_intf[] = { .type = INTF_NONE, .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */ .prog_fetch_lines_worst_case = 24, - .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), - .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17), }, { .name = "intf_7", .id = INTF_7, .base = 0x3b000, .len = 0x280,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index 465b646..4c7eb55 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = { static const struct dpu_lm_cfg sm8550_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -139,7 +139,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -147,7 +147,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -155,7 +155,7 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -163,14 +163,14 @@ static const struct dpu_lm_cfg sm8550_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h index 6caa7d4..dec83ea 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_1_sar2130p.h
@@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sar2130p_sspp[] = { static const struct dpu_lm_cfg sar2130p_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -139,7 +139,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -147,7 +147,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -155,7 +155,7 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -163,14 +163,14 @@ static const struct dpu_lm_cfg sar2130p_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h index 7243eeb..52ff4ba 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
@@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg x1e80100_sspp[] = { static const struct dpu_lm_cfg x1e80100_lm[] = { { .name = "lm_0", .id = LM_0, - .base = 0x44000, .len = 0x320, + .base = 0x44000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_1, @@ -138,7 +138,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_0, }, { .name = "lm_1", .id = LM_1, - .base = 0x45000, .len = 0x320, + .base = 0x45000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_0, @@ -146,7 +146,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_1, }, { .name = "lm_2", .id = LM_2, - .base = 0x46000, .len = 0x320, + .base = 0x46000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_3, @@ -154,7 +154,7 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_2, }, { .name = "lm_3", .id = LM_3, - .base = 0x47000, .len = 0x320, + .base = 0x47000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_2, @@ -162,14 +162,14 @@ static const struct dpu_lm_cfg x1e80100_lm[] = { .dspp = DSPP_3, }, { .name = "lm_4", .id = LM_4, - .base = 0x48000, .len = 0x320, + .base = 0x48000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_5, .pingpong = PINGPONG_4, }, { .name = "lm_5", .id = LM_5, - .base = 0x49000, .len = 0x320, + .base = 0x49000, .len = 0x400, .features = MIXER_MSM8998_MASK, .sblk = &sdm845_lm_sblk, .lm_pair = LM_4,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 188ee0af..23dcbe1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c
@@ -89,7 +89,7 @@ static void dpu_setup_dspp_gc(struct dpu_hw_dspp *ctx, base = ctx->cap->sblk->gc.base; if (!base) { - DRM_ERROR("invalid ctx %pK gc base\n", ctx); + DRM_ERROR("invalid ctx %p gc base\n", ctx); return; }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c index e65f1fc..f8f96ad 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp_v13.c
@@ -156,11 +156,13 @@ static void dpu_hw_sspp_setup_pe_config_v13(struct dpu_hw_sspp *ctx, u8 color; u32 lr_pe[4], tb_pe[4]; const u32 bytemask = 0xff; - u32 offset = ctx->cap->sblk->sspp_rec0_blk.base; + u32 offset; if (!ctx || !pe_ext) return; + offset = ctx->cap->sblk->sspp_rec0_blk.base; + c = &ctx->hw; /* program SW pixel extension override for all pipes*/ for (color = 0; color < DPU_MAX_PLANES; color++) {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 451a4fc..7e77d88 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -350,26 +350,28 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(struct dpu_rm *rm, return true; } -static bool dpu_rm_find_lms(struct dpu_rm *rm, - struct dpu_global_state *global_state, - uint32_t crtc_id, bool skip_dspp, - struct msm_display_topology *topology, - int *lm_idx, int *pp_idx, int *dspp_idx) +static int _dpu_rm_reserve_lms(struct dpu_rm *rm, + struct dpu_global_state *global_state, + uint32_t crtc_id, + struct msm_display_topology *topology) { + int lm_idx[MAX_BLOCKS]; + int pp_idx[MAX_BLOCKS]; + int dspp_idx[MAX_BLOCKS] = {0}; int i, lm_count = 0; + if (!topology->num_lm) { + DPU_ERROR("zero LMs in topology\n"); + return -EINVAL; + } + /* Find a primary mixer */ for (i = 0; i < ARRAY_SIZE(rm->mixer_blks) && lm_count < topology->num_lm; i++) { if (!rm->mixer_blks[i]) continue; - if (skip_dspp && to_dpu_hw_mixer(rm->mixer_blks[i])->cap->dspp) { - DPU_DEBUG("Skipping LM_%d, skipping LMs with DSPPs\n", i); - continue; - } - /* * Reset lm_count to an even index. This will drop the previous * primary mixer if failed to find its peer. @@ -408,38 +410,12 @@ static bool dpu_rm_find_lms(struct dpu_rm *rm, } } - return lm_count == topology->num_lm; -} - -static int _dpu_rm_reserve_lms(struct dpu_rm *rm, - struct dpu_global_state *global_state, - uint32_t crtc_id, - struct msm_display_topology *topology) - -{ - int lm_idx[MAX_BLOCKS]; - int pp_idx[MAX_BLOCKS]; - int dspp_idx[MAX_BLOCKS] = {0}; - int i; - bool found; - - if (!topology->num_lm) { - DPU_ERROR("zero LMs in topology\n"); - return -EINVAL; - } - - /* Try using non-DSPP LM blocks first */ - found = dpu_rm_find_lms(rm, global_state, crtc_id, !topology->num_dspp, - topology, lm_idx, pp_idx, dspp_idx); - if (!found && !topology->num_dspp) - found = dpu_rm_find_lms(rm, global_state, crtc_id, false, - topology, lm_idx, pp_idx, dspp_idx); - if (!found) { + if (lm_count != topology->num_lm) { DPU_DEBUG("unable to find appropriate mixers\n"); return -ENAVAIL; } - for (i = 0; i < topology->num_lm; i++) { + for (i = 0; i < lm_count; i++) { global_state->mixer_to_crtc_id[lm_idx[i]] = crtc_id; global_state->pingpong_to_crtc_id[pp_idx[i]] = crtc_id; global_state->dspp_to_crtc_id[dspp_idx[i]] =
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index e0de545..db6da99 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -584,13 +584,30 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host) * FIXME: Reconsider this if/when CMD mode handling is rewritten to use * transfer time and data overhead as a starting point of the calculations. */ -static unsigned long dsi_adjust_pclk_for_compression(const struct drm_display_mode *mode, - const struct drm_dsc_config *dsc) +static unsigned long +dsi_adjust_pclk_for_compression(const struct drm_display_mode *mode, + const struct drm_dsc_config *dsc, + bool is_bonded_dsi) { - int new_hdisplay = DIV_ROUND_UP(mode->hdisplay * drm_dsc_get_bpp_int(dsc), - dsc->bits_per_component * 3); + int hdisplay, new_hdisplay, new_htotal; - int new_htotal = mode->htotal - mode->hdisplay + new_hdisplay; + /* + * For bonded DSI, split hdisplay across two links and round up each + * half separately, passing the full hdisplay would only round up once. + * This also aligns with the hdisplay we program later in + * dsi_timing_setup() + */ + hdisplay = mode->hdisplay; + if (is_bonded_dsi) + hdisplay /= 2; + + new_hdisplay = DIV_ROUND_UP(hdisplay * drm_dsc_get_bpp_int(dsc), + dsc->bits_per_component * 3); + + if (is_bonded_dsi) + new_hdisplay *= 2; + + new_htotal = mode->htotal - mode->hdisplay + new_hdisplay; return mult_frac(mode->clock * 1000u, new_htotal, mode->htotal); } @@ -603,7 +620,7 @@ static unsigned long dsi_get_pclk_rate(const struct drm_display_mode *mode, pclk_rate = mode->clock * 1000u; if (dsc) - pclk_rate = dsi_adjust_pclk_for_compression(mode, dsc); + pclk_rate = dsi_adjust_pclk_for_compression(mode, dsc, is_bonded_dsi); /* * For bonded DSI mode, the current DRM mode has the complete width of the @@ -993,7 +1010,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) if (msm_host->dsc) { struct drm_dsc_config *dsc = msm_host->dsc; - u32 bytes_per_pclk; + u32 bits_per_pclk; /* update dsc params with timing params */ if (!dsc || !mode->hdisplay || !mode->vdisplay) { @@ -1015,7 +1032,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) /* * DPU sends 3 bytes per pclk cycle to DSI. If widebus is - * enabled, bus width is extended to 6 bytes. + * enabled, MDP always sends out 48-bit compressed data per + * pclk and on average, DSI consumes an amount of compressed + * data equivalent to the uncompressed pixel depth per pclk. * * Calculate the number of pclks needed to transmit one line of * the compressed data. @@ -1027,12 +1046,12 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) * unused anyway. */ h_total -= hdisplay; - if (wide_bus_enabled && !(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) - bytes_per_pclk = 6; + if (wide_bus_enabled) + bits_per_pclk = mipi_dsi_pixel_format_to_bpp(msm_host->format); else - bytes_per_pclk = 3; + bits_per_pclk = 24; - hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), bytes_per_pclk); + hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc) * 8, bits_per_pclk); h_total += hdisplay; ha_end = ha_start + hdisplay;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8cb0db3..0118244 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
@@ -51,8 +51,8 @@ #define DSI_PHY_7NM_QUIRK_V4_3 BIT(3) /* Hardware is V5.2 */ #define DSI_PHY_7NM_QUIRK_V5_2 BIT(4) -/* Hardware is V7.0 */ -#define DSI_PHY_7NM_QUIRK_V7_0 BIT(5) +/* Hardware is V7.2 */ +#define DSI_PHY_7NM_QUIRK_V7_2 BIT(5) struct dsi_pll_config { bool enable_ssc; @@ -143,7 +143,7 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_7nm *pll, struct dsi_pll_config if (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_PRE_V4_1) { config->pll_clock_inverters = 0x28; - } else if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + } else if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (pll_freq < 163000000ULL) config->pll_clock_inverters = 0xa0; else if (pll_freq < 175000000ULL) @@ -284,7 +284,7 @@ static void dsi_pll_config_hzindep_reg(struct dsi_pll_7nm *pll) } if ((pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (pll->phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (pll->vco_current_rate < 1557000000ULL) vco_config_1 = 0x08; else @@ -699,7 +699,7 @@ static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) case MSM_DSI_PHY_MASTER: pll_7nm->slave = pll_7nm_list[(pll_7nm->phy->id + 1) % DSI_MAX]; /* v7.0: Enable ATB_EN0 and alternate clock output to external phy */ - if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0) + if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2) writel(0x07, base + REG_DSI_7nm_PHY_CMN_CTRL_5); break; case MSM_DSI_PHY_SLAVE: @@ -987,7 +987,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Request for REFGEN READY */ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_3) || (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { writel(0x1, phy->base + REG_DSI_7nm_PHY_CMN_GLBL_DIGTOP_SPARE10); udelay(500); } @@ -1021,7 +1021,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, lane_ctrl0 = 0x1f; } - if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { if (phy->cphy_mode) { /* TODO: different for second phy */ vreg_ctrl_0 = 0x57; @@ -1097,7 +1097,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* program CMN_CTRL_4 for minor_ver 2 chipsets*/ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0) || + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2) || (readl(base + REG_DSI_7nm_PHY_CMN_REVISION_ID0) & (0xf0)) == 0x20) writel(0x04, base + REG_DSI_7nm_PHY_CMN_CTRL_4); @@ -1213,7 +1213,7 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) /* Turn off REFGEN Vote */ if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_3) || (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2) || - (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_0)) { + (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V7_2)) { writel(0x0, base + REG_DSI_7nm_PHY_CMN_GLBL_DIGTOP_SPARE10); wmb(); /* Delay to ensure HW removes vote before PHY shut down */ @@ -1502,7 +1502,7 @@ const struct msm_dsi_phy_cfg dsi_phy_3nm_8750_cfgs = { #endif .io_start = { 0xae95000, 0xae97000 }, .num_dsi_phy = 2, - .quirks = DSI_PHY_7NM_QUIRK_V7_0, + .quirks = DSI_PHY_7NM_QUIRK_V7_2, }; const struct msm_dsi_phy_cfg dsi_phy_3nm_kaanapali_cfgs = { @@ -1525,5 +1525,5 @@ const struct msm_dsi_phy_cfg dsi_phy_3nm_kaanapali_cfgs = { #endif .io_start = { 0x9ac1000, 0x9ac4000 }, .num_dsi_phy = 2, - .quirks = DSI_PHY_7NM_QUIRK_V7_0, + .quirks = DSI_PHY_7NM_QUIRK_V7_2, };
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 00d4530..cc23949 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1230,6 +1230,9 @@ nouveau_connector_aux_xfer(struct drm_dp_aux *obj, struct drm_dp_aux_msg *msg) u8 size = msg->size; int ret; + if (pm_runtime_suspended(nv_connector->base.dev->dev)) + return -EBUSY; + nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP); if (!nv_encoder) return -ENODEV;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index e10192f..0121d56 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
@@ -737,8 +737,8 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; caps->status = 0; @@ -773,8 +773,8 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; jt->status = 0; @@ -861,8 +861,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) _DOD = output.pointer; - if (WARN_ON(_DOD->type != ACPI_TYPE_PACKAGE) || - WARN_ON(_DOD->package.count > ARRAY_SIZE(dod->acpiIdList))) + if (_DOD->type != ACPI_TYPE_PACKAGE || + _DOD->package.count > ARRAY_SIZE(dod->acpiIdList)) return; for (int i = 0; i < _DOD->package.count; i++) {
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index bd703a2..a70f1db 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -893,14 +893,15 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue out_sync: /* Make sure the CPU caches are invalidated before the seqno is read. - * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check + * panthor_gem_sync() is a NOP if map_wc=true, so no need to check * it here. */ - panthor_gem_sync(&bo->base.base, queue->syncwait.offset, + panthor_gem_sync(&bo->base.base, + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE, + queue->syncwait.offset, queue->syncwait.sync64 ? sizeof(struct panthor_syncobj_64b) : - sizeof(struct panthor_syncobj_32b), - DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); + sizeof(struct panthor_syncobj_32b)); return queue->syncwait.kmap + queue->syncwait.offset;
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index b4aa49b..4b10715 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2915,9 +2915,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->family == CHIP_HAINAN) { if ((rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6660) || (rdev->pdev->device == 0x6664) || (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) { + (rdev->pdev->device == 0x6667) || + (rdev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((rdev->pdev->revision == 0xC3) ||
diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c index f74a0aa..29f2b7d 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c
@@ -1122,6 +1122,7 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct rzg2l_mipi_dsi *dsi = host_to_rzg2l_mipi_dsi(host); + int bpp; int ret; if (device->lanes > dsi->num_data_lanes) { @@ -1131,7 +1132,8 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, return -EINVAL; } - switch (mipi_dsi_pixel_format_to_bpp(device->format)) { + bpp = mipi_dsi_pixel_format_to_bpp(device->format); + switch (bpp) { case 24: break; case 18: @@ -1162,6 +1164,18 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host, drm_bridge_add(&dsi->bridge); + /* + * Report the required division ratio setting for the MIPI clock dividers. + * + * vclk * bpp = hsclk * 8 * num_lanes + * + * vclk * DSI_AB_divider = hsclk * 16 + * + * which simplifies to... + * DSI_AB_divider = bpp * 2 / num_lanes + */ + rzg2l_cpg_dsi_div_set_divider(bpp * 2 / dsi->lanes, PLL5_TARGET_DSI); + return 0; }
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index e6ee354..2d5cb21 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -361,6 +361,7 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done + * @result: 0 on success, -ERRNO on error * * Finish the job's fence and resubmit the work items. */
diff --git a/drivers/gpu/drm/sitronix/st7586.c b/drivers/gpu/drm/sitronix/st7586.c index b57ebf3..16b6b4e 100644 --- a/drivers/gpu/drm/sitronix/st7586.c +++ b/drivers/gpu/drm/sitronix/st7586.c
@@ -347,6 +347,12 @@ static int st7586_probe(struct spi_device *spi) if (ret) return ret; + /* + * Override value set by mipi_dbi_spi_init(). This driver is a bit + * non-standard, so best to set it explicitly here. + */ + dbi->write_memory_bpw = 8; + /* Cannot read from this controller via SPI */ dbi->read_commands = NULL; @@ -356,15 +362,6 @@ static int st7586_probe(struct spi_device *spi) if (ret) return ret; - /* - * we are using 8-bit data, so we are not actually swapping anything, - * but setting mipi->swap_bytes makes mipi_dbi_typec3_command() do the - * right thing and not use 16-bit transfers (which results in swapped - * bytes on little-endian systems and causes out of order data to be - * sent to the display). - */ - dbi->swap_bytes = true; - drm_mode_config_reset(drm); ret = drm_dev_register(drm, 0);
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 6ecf9e2..c77455b 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -737,6 +737,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, unsigned int height = drm_rect_height(rect); unsigned int line_length = DIV_ROUND_UP(width, 8); unsigned int page_height = SSD130X_PAGE_HEIGHT; + u8 page_start = ssd130x->page_offset + y / page_height; unsigned int pages = DIV_ROUND_UP(height, page_height); struct drm_device *drm = &ssd130x->drm; u32 array_idx = 0; @@ -774,14 +775,11 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, */ if (!ssd130x->page_address_mode) { - u8 page_start; - /* Set address range for horizontal addressing mode */ ret = ssd130x_set_col_range(ssd130x, ssd130x->col_offset + x, width); if (ret < 0) return ret; - page_start = ssd130x->page_offset + y / page_height; ret = ssd130x_set_page_range(ssd130x, page_start, pages); if (ret < 0) return ret; @@ -813,7 +811,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x, */ if (ssd130x->page_address_mode) { ret = ssd130x_set_page_pos(ssd130x, - ssd130x->page_offset + i, + page_start + i, ssd130x->col_offset + x); if (ret < 0) return ret;
diff --git a/drivers/gpu/drm/sysfb/efidrm.c b/drivers/gpu/drm/sysfb/efidrm.c index 50e0aee..9d84caf 100644 --- a/drivers/gpu/drm/sysfb/efidrm.c +++ b/drivers/gpu/drm/sysfb/efidrm.c
@@ -151,7 +151,6 @@ static struct efidrm_device *efidrm_device_create(struct drm_driver *drv, struct drm_sysfb_device *sysfb; struct drm_device *dev; struct resource *mem = NULL; - void __iomem *screen_base = NULL; struct drm_plane *primary_plane; struct drm_crtc *crtc; struct drm_encoder *encoder; @@ -238,21 +237,38 @@ static struct efidrm_device *efidrm_device_create(struct drm_driver *drv, mem_flags = efidrm_get_mem_flags(dev, res->start, vsize); - if (mem_flags & EFI_MEMORY_WC) - screen_base = devm_ioremap_wc(&pdev->dev, mem->start, resource_size(mem)); - else if (mem_flags & EFI_MEMORY_UC) - screen_base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); - else if (mem_flags & EFI_MEMORY_WT) - screen_base = devm_memremap(&pdev->dev, mem->start, resource_size(mem), - MEMREMAP_WT); - else if (mem_flags & EFI_MEMORY_WB) - screen_base = devm_memremap(&pdev->dev, mem->start, resource_size(mem), - MEMREMAP_WB); - else + if (mem_flags & EFI_MEMORY_WC) { + void __iomem *screen_base = devm_ioremap_wc(&pdev->dev, mem->start, + resource_size(mem)); + + if (!screen_base) + return ERR_PTR(-ENXIO); + iosys_map_set_vaddr_iomem(&sysfb->fb_addr, screen_base); + } else if (mem_flags & EFI_MEMORY_UC) { + void __iomem *screen_base = devm_ioremap(&pdev->dev, mem->start, + resource_size(mem)); + + if (!screen_base) + return ERR_PTR(-ENXIO); + iosys_map_set_vaddr_iomem(&sysfb->fb_addr, screen_base); + } else if (mem_flags & EFI_MEMORY_WT) { + void *screen_base = devm_memremap(&pdev->dev, mem->start, + resource_size(mem), MEMREMAP_WT); + + if (IS_ERR(screen_base)) + return ERR_CAST(screen_base); + iosys_map_set_vaddr(&sysfb->fb_addr, screen_base); + } else if (mem_flags & EFI_MEMORY_WB) { + void *screen_base = devm_memremap(&pdev->dev, mem->start, + resource_size(mem), MEMREMAP_WB); + + if (IS_ERR(screen_base)) + return ERR_CAST(screen_base); + iosys_map_set_vaddr(&sysfb->fb_addr, screen_base); + } else { drm_err(dev, "invalid mem_flags: 0x%llx\n", mem_flags); - if (!screen_base) - return ERR_PTR(-ENOMEM); - iosys_map_set_vaddr_iomem(&sysfb->fb_addr, screen_base); + return ERR_PTR(-EINVAL); + } /* * Modesetting
diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c index 64272cd..cbf6946 100644 --- a/drivers/gpu/drm/tiny/sharp-memory.c +++ b/drivers/gpu/drm/tiny/sharp-memory.c
@@ -541,8 +541,8 @@ static int sharp_memory_probe(struct spi_device *spi) smd = devm_drm_dev_alloc(dev, &sharp_memory_drm_driver, struct sharp_memory_device, drm); - if (!smd) - return -ENOMEM; + if (IS_ERR(smd)) + return PTR_ERR(smd); spi_set_drvdata(spi, smd);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index d468f83..f310330 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -222,13 +222,13 @@ static void ttm_bo_reserve_interrupted(struct kunit *test) KUNIT_FAIL(test, "Couldn't create ttm bo reserve task\n"); /* Take a lock so the threaded reserve has to wait */ - mutex_lock(&bo->base.resv->lock.base); + dma_resv_lock(bo->base.resv, NULL); wake_up_process(task); msleep(20); err = kthread_stop(task); - mutex_unlock(&bo->base.resv->lock.base); + dma_resv_unlock(bo->base.resv); KUNIT_ASSERT_EQ(test, err, -ERESTARTSYS); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index acb9197..0765d69 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1107,8 +1107,7 @@ struct ttm_bo_swapout_walk { static s64 ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - struct ttm_resource *res = bo->resource; - struct ttm_place place = { .mem_type = res->mem_type }; + struct ttm_place place = { .mem_type = bo->resource->mem_type }; struct ttm_bo_swapout_walk *swapout_walk = container_of(walk, typeof(*swapout_walk), walk); struct ttm_operation_ctx *ctx = walk->arg.ctx; @@ -1148,7 +1147,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) /* * Move to system cached */ - if (res->mem_type != TTM_PL_SYSTEM) { + if (bo->resource->mem_type != TTM_PL_SYSTEM) { struct ttm_resource *evict_mem; struct ttm_place hop; @@ -1180,15 +1179,15 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) if (ttm_tt_is_populated(tt)) { spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(res, bo); + ttm_resource_del_bulk_move(bo->resource, bo); spin_unlock(&bdev->lru_lock); ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags); spin_lock(&bdev->lru_lock); if (ret) - ttm_resource_add_bulk_move(res, bo); - ttm_resource_move_to_lru_tail(res); + ttm_resource_add_bulk_move(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); spin_unlock(&bdev->lru_lock); }
diff --git a/drivers/gpu/drm/ttm/ttm_pool_internal.h b/drivers/gpu/drm/ttm/ttm_pool_internal.h index 82c4b7e..24c179f 100644 --- a/drivers/gpu/drm/ttm/ttm_pool_internal.h +++ b/drivers/gpu/drm/ttm/ttm_pool_internal.h
@@ -17,7 +17,7 @@ static inline bool ttm_pool_uses_dma32(struct ttm_pool *pool) return pool->alloc_flags & TTM_ALLOCATION_POOL_USE_DMA32; } -static inline bool ttm_pool_beneficial_order(struct ttm_pool *pool) +static inline unsigned int ttm_pool_beneficial_order(struct ttm_pool *pool) { return pool->alloc_flags & 0xff; }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 2170b45..f45d93e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -105,6 +105,7 @@ struct vmw_cmdbuf_context { * @handle: DMA address handle for the command buffer space if @using_mob is * false. Immutable. * @size: The size of the command buffer space. Immutable. + * @id: Monotonically increasing ID of the last cmdbuf submitted. * @num_contexts: Number of contexts actually enabled. */ struct vmw_cmdbuf_man { @@ -132,6 +133,7 @@ struct vmw_cmdbuf_man { bool has_pool; dma_addr_t handle; size_t size; + u64 id; u32 num_contexts; }; @@ -303,6 +305,8 @@ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header) struct vmw_cmdbuf_man *man = header->man; u32 val; + header->cb_header->id = man->id++; + val = upper_32_bits(header->handle); vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index f2abaf1b..57465f6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -96,12 +96,17 @@ struct vmwgfx_hash_item { struct vmw_res_func; +struct vmw_bo; +struct vmw_bo; +struct vmw_resource_dirty; + /** - * struct vmw-resource - base class for hardware resources + * struct vmw_resource - base class for hardware resources * * @kref: For refcounting. * @dev_priv: Pointer to the device private for this resource. Immutable. * @id: Device id. Protected by @dev_priv::resource_lock. + * @used_prio: Priority for this resource. * @guest_memory_size: Guest memory buffer size. Immutable. * @res_dirty: Resource contains data not yet in the guest memory buffer. * Protected by resource reserved. @@ -117,18 +122,16 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * guest memory buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. - * @mob_node; Node for the MOB guest memory rbtree. Protected by + * @mob_node: Node for the MOB guest memory rbtree. Protected by * @guest_memory_bo reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex + * @dirty: resource's dirty tracker * @res_free: The resource destructor. * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ -struct vmw_bo; -struct vmw_bo; -struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; @@ -196,8 +199,8 @@ struct vmw_surface_offset; * @quality_level: Quality level. * @autogen_filter: Filter for automatically generated mipmaps. * @array_size: Number of array elements for a 1D/2D texture. For cubemap - texture number of faces * array_size. This should be 0 for pre - SM4 device. + * texture number of faces * array_size. This should be 0 for pre + * SM4 device. * @buffer_byte_stride: Buffer byte stride. * @num_sizes: Size of @sizes. For GB surface this should always be 1. * @base_size: Surface dimension. @@ -265,18 +268,24 @@ struct vmw_fifo_state { struct vmw_res_cache_entry { uint32_t handle; struct vmw_resource *res; + /* private: */ void *private; + /* public: */ unsigned short valid_handle; unsigned short valid; }; /** * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings. + * @vmw_dma_alloc_coherent: Use TTM coherent pages + * @vmw_dma_map_populate: Unmap from DMA just after unpopulate + * @vmw_dma_map_bind: Unmap from DMA just before unbind */ enum vmw_dma_map_mode { - vmw_dma_alloc_coherent, /* Use TTM coherent pages */ - vmw_dma_map_populate, /* Unmap from DMA just after unpopulate */ - vmw_dma_map_bind, /* Unmap from DMA just before unbind */ + vmw_dma_alloc_coherent, + vmw_dma_map_populate, + vmw_dma_map_bind, + /* private: */ vmw_dma_map_max }; @@ -284,8 +293,11 @@ enum vmw_dma_map_mode { * struct vmw_sg_table - Scatter/gather table for binding, with additional * device-specific information. * + * @mode: which page mapping mode to use + * @pages: Array of page pointers to the pages. + * @addrs: DMA addresses to the pages if coherent pages are used. * @sgt: Pointer to a struct sg_table with binding information - * @num_regions: Number of regions with device-address contiguous pages + * @num_pages: Number of @pages */ struct vmw_sg_table { enum vmw_dma_map_mode mode; @@ -353,6 +365,7 @@ struct vmw_ctx_validation_info; * than from user-space * @fp: If @kernel is false, points to the file of the client. Otherwise * NULL + * @filp: DRM state for this file * @cmd_bounce: Command bounce buffer used for command validation before * copying to fifo space * @cmd_bounce_size: Current command bounce buffer size @@ -729,7 +742,7 @@ extern void vmw_svga_disable(struct vmw_private *dev_priv); bool vmwgfx_supported(struct vmw_private *vmw); -/** +/* * GMR utilities - vmwgfx_gmr.c */ @@ -739,7 +752,7 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, int gmr_id); extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); -/** +/* * User handles */ struct vmw_user_object { @@ -759,7 +772,7 @@ void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); void vmw_user_object_unmap(struct vmw_user_object *uo); bool vmw_user_object_is_mapped(struct vmw_user_object *uo); -/** +/* * Resource utilities - vmwgfx_resource.c */ struct vmw_user_resource_conv; @@ -819,7 +832,7 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) return !RB_EMPTY_NODE(&res->mob_node); } -/** +/* * GEM related functionality - vmwgfx_gem.c */ struct vmw_bo_params; @@ -833,7 +846,7 @@ extern int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); extern void vmw_debugfs_gem_init(struct vmw_private *vdev); -/** +/* * Misc Ioctl functionality - vmwgfx_ioctl.c */ @@ -846,7 +859,7 @@ extern int vmw_present_ioctl(struct drm_device *dev, void *data, extern int vmw_present_readback_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -/** +/* * Fifo utilities - vmwgfx_fifo.c */ @@ -880,9 +893,11 @@ extern int vmw_cmd_flush(struct vmw_private *dev_priv, /** - * vmw_fifo_caps - Returns the capabilities of the FIFO command + * vmw_fifo_caps - Get the capabilities of the FIFO command * queue or 0 if fifo memory isn't present. * @dev_priv: The device private context + * + * Returns: capabilities of the FIFO command or %0 if fifo memory not present */ static inline uint32_t vmw_fifo_caps(const struct vmw_private *dev_priv) { @@ -893,9 +908,11 @@ static inline uint32_t vmw_fifo_caps(const struct vmw_private *dev_priv) /** - * vmw_is_cursor_bypass3_enabled - Returns TRUE iff Cursor Bypass 3 - * is enabled in the FIFO. + * vmw_is_cursor_bypass3_enabled - check Cursor Bypass 3 enabled setting + * in the FIFO. * @dev_priv: The device private context + * + * Returns: %true iff Cursor Bypass 3 is enabled in the FIFO */ static inline bool vmw_is_cursor_bypass3_enabled(const struct vmw_private *dev_priv) @@ -903,7 +920,7 @@ vmw_is_cursor_bypass3_enabled(const struct vmw_private *dev_priv) return (vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_CURSOR_BYPASS_3) != 0; } -/** +/* * TTM buffer object driver - vmwgfx_ttm_buffer.c */ @@ -927,7 +944,7 @@ extern void vmw_piter_start(struct vmw_piter *viter, * * @viter: Pointer to the iterator to advance. * - * Returns false if past the list of pages, true otherwise. + * Returns: false if past the list of pages, true otherwise. */ static inline bool vmw_piter_next(struct vmw_piter *viter) { @@ -939,7 +956,7 @@ static inline bool vmw_piter_next(struct vmw_piter *viter) * * @viter: Pointer to the iterator * - * Returns the DMA address of the page pointed to by @viter. + * Returns: the DMA address of the page pointed to by @viter. */ static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) { @@ -951,14 +968,14 @@ static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) * * @viter: Pointer to the iterator * - * Returns the DMA address of the page pointed to by @viter. + * Returns: the DMA address of the page pointed to by @viter. */ static inline struct page *vmw_piter_page(struct vmw_piter *viter) { return viter->pages[viter->i]; } -/** +/* * Command submission - vmwgfx_execbuf.c */ @@ -993,7 +1010,7 @@ extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); -/** +/* * IRQs and wating - vmwgfx_irq.c */ @@ -1016,7 +1033,7 @@ bool vmw_generic_waiter_add(struct vmw_private *dev_priv, u32 flag, bool vmw_generic_waiter_remove(struct vmw_private *dev_priv, u32 flag, int *waiter_count); -/** +/* * Kernel modesetting - vmwgfx_kms.c */ @@ -1048,7 +1065,7 @@ extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); -/** +/* * Overlay control - vmwgfx_overlay.c */ @@ -1063,20 +1080,20 @@ int vmw_overlay_unref(struct vmw_private *dev_priv, uint32_t stream_id); int vmw_overlay_num_overlays(struct vmw_private *dev_priv); int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv); -/** +/* * GMR Id manager */ int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type); void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type); -/** +/* * System memory manager */ int vmw_sys_man_init(struct vmw_private *dev_priv); void vmw_sys_man_fini(struct vmw_private *dev_priv); -/** +/* * Prime - vmwgfx_prime.c */ @@ -1292,7 +1309,7 @@ extern void vmw_cmdbuf_irqthread(struct vmw_cmdbuf_man *man); * @line: The current line of the blit. * @line_offset: Offset of the current line segment. * @cpp: Bytes per pixel (granularity information). - * @memcpy: Which memcpy function to use. + * @do_cpy: Which memcpy function to use. */ struct vmw_diff_cpy { struct drm_rect rect; @@ -1380,13 +1397,14 @@ vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting + * @fmt: format string for the args * * This macro is for debugging vmwgfx mode-setting code. */ #define VMW_DEBUG_KMS(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) -/** +/* * Inline helper functions */ @@ -1417,11 +1435,13 @@ static inline void vmw_fifo_resource_dec(struct vmw_private *dev_priv) /** * vmw_fifo_mem_read - Perform a MMIO read from the fifo memory - * + * @vmw: The device private structure * @fifo_reg: The fifo register to read from * * This function is intended to be equivalent to ioread32() on * memremap'd memory, but without byteswapping. + * + * Returns: the value read */ static inline u32 vmw_fifo_mem_read(struct vmw_private *vmw, uint32 fifo_reg) { @@ -1431,8 +1451,9 @@ static inline u32 vmw_fifo_mem_read(struct vmw_private *vmw, uint32 fifo_reg) /** * vmw_fifo_mem_write - Perform a MMIO write to volatile memory - * - * @addr: The fifo register to write to + * @vmw: The device private structure + * @fifo_reg: The fifo register to write to + * @value: The value to write * * This function is intended to be equivalent to iowrite32 on * memremap'd memory, but without byteswapping.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 3057f8b..e1f1802 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1143,7 +1143,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); @@ -1199,7 +1199,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 55730e2..e7bddf8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -771,7 +771,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, ret = vmw_bo_dirty_add(bo); if (!ret && surface && surface->res.func->dirty_alloc) { surface->res.coherent = true; - ret = surface->res.func->dirty_alloc(&surface->res); + if (surface->res.dirty == NULL) + ret = surface->res.func->dirty_alloc(&surface->res); } ttm_bo_unreserve(&bo->tbo); }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index fd4e764..45561bc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -260,6 +260,13 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo) return ret; } +static void vmw_bo_dirty_free(struct kref *kref) +{ + struct vmw_bo_dirty *dirty = container_of(kref, struct vmw_bo_dirty, ref_count); + + kvfree(dirty); +} + /** * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object * @vbo: The buffer object @@ -274,7 +281,7 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; - if (dirty && kref_put(&dirty->ref_count, (void *)kvfree)) + if (dirty && kref_put(&dirty->ref_count, vmw_bo_dirty_free)) vbo->dirty = NULL; }
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 68172b0..dc5a4fa 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -96,6 +96,12 @@ #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) +#define SELECTIVE_READ_ADDRESSING REG_BIT(30) +#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) +#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption.
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 24fc64f..9d66f16 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -553,6 +553,7 @@ #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index c59b141..7fd07d1 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -830,6 +830,7 @@ static void xe_config_device_release(struct config_item *item) mutex_destroy(&dev->lock); + kfree(dev->config.ctx_restore_mid_bb[0].cs); kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); }
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 52ee24e..3eb06b2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c
@@ -837,6 +837,14 @@ static void detect_preproduction_hw(struct xe_device *xe) } } +static void xe_device_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + if (atomic_read(&xe->wedged.flag)) + xe_pm_runtime_put(xe); +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -1013,6 +1021,10 @@ int xe_device_probe(struct xe_device *xe) detect_preproduction_hw(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe); + if (err) + goto err_unregister_display; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -1216,13 +1228,6 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } -static void xe_device_wedged_fini(struct drm_device *drm, void *arg) -{ - struct xe_device *xe = arg; - - xe_pm_runtime_put(xe); -} - /** * DOC: Xe Device Wedging * @@ -1300,15 +1305,9 @@ void xe_device_declare_wedged(struct xe_device *xe) return; } - xe_pm_runtime_get_noresume(xe); - - if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { - drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); - return; - } - if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; + xe_pm_runtime_get_noresume(xe); drm_err(&xe->drm, "CRITICAL: Xe has declared device %s as wedged.\n" "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ddae7f..8ecdf94 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -266,6 +266,16 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { int i, err; @@ -320,21 +330,10 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) return 0; err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); + __xe_exec_queue_fini(q); return err; } -static void __xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - q->ops->fini(q); - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); -} - struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags,
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2bda426..d1561eb 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -313,6 +313,8 @@ static void dev_fini_ggtt(void *arg) { struct xe_ggtt *ggtt = arg; + scoped_guard(mutex, &ggtt->lock) + ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE; drain_workqueue(ggtt->wq); } @@ -377,6 +379,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + ggtt->flags |= XE_GGTT_FLAGS_ONLINE; err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); if (err) return err; @@ -410,13 +413,10 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) static void ggtt_node_remove(struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); mutex_lock(&ggtt->lock); + bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE; if (bound) xe_ggtt_clear(ggtt, node->base.start, node->base.size); drm_mm_remove_node(&node->base); @@ -429,8 +429,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) if (node->invalidate_on_remove) xe_ggtt_invalidate(ggtt); - drm_dev_exit(idx); - free_node: xe_ggtt_node_fini(node); }
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d82b71a..c002857 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -28,11 +28,14 @@ struct xe_ggtt { /** @size: Total usable size of this GGTT */ u64 size; -#define XE_GGTT_FLAGS_64K BIT(0) +#define XE_GGTT_FLAGS_64K BIT(0) +#define XE_GGTT_FLAGS_ONLINE BIT(1) /** * @flags: Flags for this GGTT * Acceptable flags: * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock + * after init */ unsigned int flags; /** @scratch: Internal object allocation used as a scratch page */
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 42438b21..707db65 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -435,15 +435,11 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } -static void xe_gsc_proxy_remove(void *arg) +static void xe_gsc_proxy_stop(struct xe_gsc *gsc) { - struct xe_gsc *gsc = arg; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - if (!gsc->proxy.component_added) - return; - /* disable HECI2 IRQs */ scoped_guard(xe_pm_runtime, xe) { CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); @@ -455,6 +451,30 @@ static void xe_gsc_proxy_remove(void *arg) } xe_gsc_wait_for_worker_completion(gsc); + gsc->proxy.started = false; +} + +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + + if (!gsc->proxy.component_added) + return; + + /* + * GSC proxy start is an async process that can be ongoing during + * Xe module load/unload. Using devm managed action to register + * xe_gsc_proxy_stop could cause issues if Xe module unload has + * already started when the action is registered, potentially leading + * to the cleanup being called at the wrong time. Therefore, instead + * of registering a separate devm action to undo what is done in + * proxy start, we call it from here, but only if the start has + * completed successfully (tracked with the 'started' flag). + */ + if (gsc->proxy.started) + xe_gsc_proxy_stop(gsc); component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); gsc->proxy.component_added = false; @@ -510,6 +530,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) */ int xe_gsc_proxy_start(struct xe_gsc *gsc) { + struct xe_gt *gt = gsc_to_gt(gsc); int err; /* enable the proxy interrupt in the GSC shim layer */ @@ -521,12 +542,18 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) */ err = xe_gsc_proxy_request_handler(gsc); if (err) - return err; + goto err_irq_disable; if (!xe_gsc_proxy_init_done(gsc)) { - xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); - return -EIO; + xe_gt_err(gt, "GSC FW reports proxy init not completed\n"); + err = -EIO; + goto err_irq_disable; } + gsc->proxy.started = true; return 0; + +err_irq_disable: + gsc_proxy_irq_toggle(gsc, false); + return err; }
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 97c0566..5aaa2a7 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -58,6 +58,8 @@ struct xe_gsc { struct mutex mutex; /** @proxy.component_added: whether the component has been added */ bool component_added; + /** @proxy.started: whether the proxy has been started */ + bool started; /** @proxy.bo: object to store message to and from the GSC */ struct xe_bo *bo; /** @proxy.to_gsc: map of the memory used to send messages to the GSC */
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9d090d0f..df6d047 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return ret; } +/* Dwords required to emit a RMW of a register */ +#define EMIT_RMW_DW 20 + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_hw_engine *hwe = q->hwe; + struct xe_reg_sr *sr = &hwe->reg_lrc; struct xe_reg_sr_entry *entry; - int count_rmw = 0, count = 0, ret; + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; @@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) ++count; + else if (entry->reg.mcr) + ++count_rmw_mcr; else ++count_rmw; } @@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) bb_len += count * 2 + 1; - if (count_rmw) - bb_len += count_rmw * 20 + 7; + /* + * RMW of MCR registers is the same as a normal RMW, except an + * additional LRI (3 dwords) is required per register to steer the read + * to a nom-terminated instance. + * + * We could probably shorten the batch slightly by eliding the + * steering for consecutive MCR registers that have the same + * group/instance target, but it's not worth the extra complexity to do + * so. + */ + bb_len += count_rmw * EMIT_RMW_DW; + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * After doing all RMW, we need 7 trailing dwords to clean up, + * plus an additional 3 dwords to reset steering if any of the + * registers were MCR. + */ + if (count_rmw || count_rmw_mcr) + bb_len += 7 + (count_rmw_mcr ? 3 : 0); + + if (hwe->class == XE_ENGINE_CLASS_RENDER) /* * Big enough to emit all of the context's 3DSTATE via * xe_lrc_emit_hwe_state_instructions() */ - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) @@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - if (count_rmw) { - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - + if (count_rmw || count_rmw_mcr) { xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; + if (entry->reg.mcr) { + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; + u8 group, instance; + + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; + *cs++ = SELECTIVE_READ_ADDRESSING | + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); + } + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; *cs++ = entry->reg.addr; *cs++ = CS_GPR_REG(0, 0).addr; @@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = CS_GPR_REG(0, 0).addr; *cs++ = entry->reg.addr; - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", - entry->reg.addr, entry->clr_bits, entry->set_bits); + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", + entry->reg.addr, entry->clr_bits, entry->set_bits, + entry->reg.mcr ? " (MCR)" : ""); } /* reset used GPR */ @@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = 0; *cs++ = CS_GPR_REG(0, 2).addr; *cs++ = 0; + + /* reset steering */ + if (count_rmw_mcr) { + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; + *cs++ = 0; + } } cs = xe_lrc_emit_hwe_state_instructions(q, cs);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index fe944687..03c1862 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) @@ -150,6 +151,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); + guard(xe_pm_runtime)(xe); xe_gt_reset(gt); }
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6df7c3f..4ab65ca 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1124,14 +1124,14 @@ static int guc_wait_ucode(struct xe_guc *guc) struct xe_guc_pc *guc_pc = >->uc.guc.pc; u32 before_freq, act_freq, cur_freq; u32 status = 0, tries = 0; + int load_result, ret; ktime_t before; u64 delta_ms; - int ret; before_freq = xe_guc_pc_get_act_freq(guc_pc); before = ktime_get(); - ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret, + ret = poll_timeout_us(load_result = guc_load_done(gt, &status, &tries), load_result, 10 * USEC_PER_MSEC, GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false); @@ -1139,7 +1139,7 @@ static int guc_wait_ucode(struct xe_guc *guc) act_freq = xe_guc_pc_get_act_freq(guc_pc); cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc); - if (ret) { + if (ret || load_result <= 0) { xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc)); @@ -1347,15 +1347,37 @@ int xe_guc_enable_communication(struct xe_guc *guc) return 0; } -int xe_guc_suspend(struct xe_guc *guc) +/** + * xe_guc_softreset() - Soft reset GuC + * @guc: The GuC object + * + * Send soft reset command to GuC through mmio send. + * + * Return: 0 if success, otherwise error code + */ +int xe_guc_softreset(struct xe_guc *guc) { - struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { XE_GUC_ACTION_CLIENT_SOFT_RESET, }; int ret; + if (!xe_uc_fw_is_running(&guc->fw)) + return 0; + ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); + if (ret) + return ret; + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = xe_guc_softreset(guc); if (ret) { xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret)); return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 66e7edc..0251491 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -44,6 +44,7 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc); void xe_guc_runtime_suspend(struct xe_guc *guc); void xe_guc_runtime_resume(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); +int xe_guc_softreset(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d045891..c80082b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -345,6 +345,7 @@ static void guc_action_disable_ct(void *arg) { struct xe_guc_ct *ct = arg; + xe_guc_ct_stop(ct); guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); }
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 799ef9f..fc4f99d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -48,6 +48,8 @@ #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 +static int guc_submit_reset_prepare(struct xe_guc *guc); + static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { @@ -239,7 +241,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -static void guc_submit_fini(struct drm_device *drm, void *arg) +static void guc_submit_sw_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; struct xe_device *xe = guc_to_xe(guc); @@ -257,6 +259,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); } +static void guc_submit_fini(void *arg) +{ + struct xe_guc *guc = arg; + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); +} + static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; @@ -326,7 +341,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) guc->submission_state.initialized = true; - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); + if (err) + return err; + + return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); } /* @@ -1252,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, */ void xe_guc_submit_wedge(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; @@ -1266,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc) if (!guc->submission_state.initialized) return; - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } + if (xe->wedged.mode == 2) { + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); + return; + } - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); + } else { + /* Forcefully kill any remaining exec queues, signal fences */ + guc_submit_reset_prepare(guc); + xe_guc_submit_stop(guc); + xe_guc_softreset(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); + } } static bool guc_submit_hint_wedged(struct xe_guc *guc) @@ -2230,6 +2258,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + bool do_destroy = false; /* Stop scheduling + flush any DRM scheduler operations */ xe_sched_submission_stop(sched); @@ -2237,7 +2266,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { if (exec_queue_destroyed(q)) - __guc_exec_queue_destroy(guc, q); + do_destroy = true; } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2273,18 +2302,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_exec_queue_trigger_cleanup(q); } } + + if (do_destroy) + __guc_exec_queue_destroy(guc, q); } -int xe_guc_submit_reset_prepare(struct xe_guc *guc) +static int guc_submit_reset_prepare(struct xe_guc *guc) { int ret; - if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) - return 0; - - if (!guc->submission_state.initialized) - return 0; - /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2299,6 +2325,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) return ret; } +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + + return guc_submit_reset_prepare(guc); +} + void xe_guc_submit_reset_wait(struct xe_guc *guc) { wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || @@ -2695,8 +2732,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc) continue; xe_sched_submission_start(sched); - if (exec_queue_killed_or_banned_or_wedged(q)) - xe_guc_exec_queue_trigger_cleanup(q); + guc_exec_queue_kill(q); } mutex_unlock(&guc->submission_state.lock); }
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b0f037b..7b70cc0 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -2413,14 +2413,14 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) * @lrc: Pointer to the lrc. * * Return latest ctx timestamp. With support for active contexts, the - * calculation may bb slightly racy, so follow a read-again logic to ensure that + * calculation may be slightly racy, so follow a read-again logic to ensure that * the context is still active before returning the right timestamp. * * Returns: New ctx timestamp value */ u64 xe_lrc_timestamp(struct xe_lrc *lrc) { - u64 lrc_ts, reg_ts, new_ts; + u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp; u32 engine_id; lrc_ts = xe_lrc_ctx_timestamp(lrc);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c307a3f..c1c6154 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -75,7 +75,8 @@ static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc) */ static inline void xe_lrc_put(struct xe_lrc *lrc) { - kref_put(&lrc->refcount, xe_lrc_destroy); + if (lrc) + kref_put(&lrc->refcount, xe_lrc_destroy); } /**
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4dd3f29..fa90441 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -543,8 +543,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, size_t offset = 0; int ret; - /* Can't read from disabled streams */ - if (!stream->enabled || !stream->sample) + if (!stream->sample) return -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { @@ -1460,6 +1459,10 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) if (stream->sample) hrtimer_cancel(&stream->poll_check_timer); + + /* Update stream->oa_buffer.tail to allow any final reports to be read */ + if (xe_oa_buffer_check_unlocked(stream)) + wake_up(&stream->poll_wq); } static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream)
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 6bee53d..922a4f3 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -187,6 +187,12 @@ static int xe_pagefault_service(struct xe_pagefault *pf) goto unlock_vm; } + if (xe_vma_read_only(vma) && + pf->consumer.access_type != XE_PAGEFAULT_ACCESS_TYPE_READ) { + err = -EPERM; + goto unlock_vm; + } + atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type); if (xe_vma_is_cpu_addr_mirror(vma))
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 13b355f..713a303 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, err = vma_check_userptr(vm, op->map.vma, pt_update); break; case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) + if (!err && op->remap.next && !op->remap.skip_next) err = vma_check_userptr(vm, op->remap.next, pt_update); break; case DRM_GPUVA_OP_UNMAP: @@ -1655,15 +1655,36 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(!level); /* Check for leaf node */ if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && - (!xe_child->base.children || !xe_child->base.children[first])) { + xe_child->level <= MAX_HUGEPTE_LEVEL) { struct iosys_map *leaf_map = &xe_child->bo->vmap; pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk); for (pgoff_t i = 0; i < count; i++) { - u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + u64 pte; int ret; /* + * If not a leaf pt, skip unless non-leaf pt is interleaved between + * leaf ptes which causes the page walk to skip over the child leaves + */ + if (xe_child->base.children && xe_child->base.children[first + i]) { + u64 pt_size = 1ULL << walk->shifts[xe_child->level]; + bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) || + (i == count - 1 && !IS_ALIGNED(next, pt_size)); + + if (!edge_pt) { + xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, + xe_walk->prl, + "PT is skipped by walk at level=%u offset=%lu", + xe_child->level, first + i); + break; + } + continue; + } + + pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + + /* * In rare scenarios, pte may not be written yet due to racy conditions. * In such cases, invalidate the PRL and fallback to full PPC invalidation. */ @@ -1674,9 +1695,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, } /* Ensure it is a defined page */ - xe_tile_assert(xe_walk->tile, - xe_child->level == 0 || - (pte & (XE_PTE_PS64 | XE_PDE_PS_2M | XE_PDPE_PS_1G))); + xe_tile_assert(xe_walk->tile, xe_child->level == 0 || + (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G))); /* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */ if (pte & XE_PTE_PS64) @@ -1701,11 +1721,11 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, killed = xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); /* - * Verify PRL is active and if entry is not a leaf pte (base.children conditions), - * there is a potential need to invalidate the PRL if any PTE (num_live) are dropped. + * Verify if any PTE are potentially dropped at non-leaf levels, either from being + * killed or the page walk covers the region. */ - if (xe_walk->prl && level > 1 && xe_child->num_live && - xe_child->base.children && xe_child->base.children[first]) { + if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) { bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base); /* @@ -2178,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm, err = unbind_op_prepare(tile, pt_update_ops, old); - if (!err && op->remap.prev) { + if (!err && op->remap.prev && !op->remap.skip_prev) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } - if (!err && op->remap.next) { + if (!err && op->remap.next && !op->remap.skip_next) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; @@ -2408,10 +2428,10 @@ static void op_commit(struct xe_vm *vm, unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, fence, fence2, false); - if (op->remap.next) + if (op->remap.next && !op->remap.skip_next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, fence, fence2, false); break;
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index d61446b..872217f 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -380,6 +380,18 @@ int xe_pxp_init(struct xe_device *xe) return 0; } + /* + * On PTL, older GSC FWs have a bug that can cause them to crash during + * PXP invalidation events, which leads to a complete loss of power + * management on the media GT. Therefore, we can't use PXP on FWs that + * have this bug, which was fixed in PTL GSC build 1396. + */ + if (xe->info.platform == XE_PANTHERLAKE && + gt->uc.gsc.fw.versions.found[XE_UC_FW_VER_RELEASE].build < 1396) { + drm_info(&xe->drm, "PXP requires PTL GSC build 1396 or newer\n"); + return 0; + } + pxp = drmm_kzalloc(&xe->drm, sizeof(struct xe_pxp), GFP_KERNEL); if (!pxp) { err = -ENOMEM; @@ -512,7 +524,7 @@ static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) static int pxp_start(struct xe_pxp *pxp, u8 type) { int ret = 0; - bool restart = false; + bool restart; if (!xe_pxp_is_enabled(pxp)) return -ENODEV; @@ -541,6 +553,8 @@ static int pxp_start(struct xe_pxp *pxp, u8 type) msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) return -ETIMEDOUT; + restart = false; + mutex_lock(&pxp->mutex); /* If PXP is not already active, turn it on */ @@ -583,6 +597,7 @@ static int pxp_start(struct xe_pxp *pxp, u8 type) drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); mutex_lock(&pxp->mutex); pxp->status = XE_PXP_ERROR; + complete_all(&pxp->termination); goto out_unlock; } @@ -870,11 +885,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp) pxp->key_instance++; needs_queue_inval = true; break; - default: - drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u", - pxp->status); - ret = -EIO; - goto out; } /* @@ -899,7 +909,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp) pxp->last_suspend_key_instance = pxp->key_instance; -out: return ret; }
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 2e5c7894..a07be16 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -98,10 +98,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, *pentry = *e; ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); if (ret) - goto fail; + goto fail_free; return 0; +fail_free: + kfree(pentry); fail: xe_gt_err(gt, "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 248620b..53d420d 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -280,6 +280,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -345,6 +348,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -397,6 +403,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + i = emit_render_cache_flush(job, dw, i); if (job->user_fence.used)
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c index 968f324..2ae9eff2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_packet.c +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c
@@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); if (ret) { xe_sriov_packet_free(*data); + *data = NULL; + return ret; }
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index ca67d0b..c6b92d4 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -903,7 +903,7 @@ int xe_svm_init(struct xe_vm *vm) void xe_svm_close(struct xe_vm *vm) { xe_assert(vm->xe, xe_vm_is_closed(vm)); - flush_work(&vm->svm.garbage_collector.work); + disable_work_sync(&vm->svm.garbage_collector.work); xe_svm_put_pagemaps(vm); drm_pagemap_release_owner(&vm->svm.peer); }
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index eb13639..24d6d9a 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } } break; @@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (signal) { sync->chain_fence = dma_fence_chain_alloc(); - if (!sync->chain_fence) - return -ENOMEM; + if (!sync->chain_fence) { + err = -ENOMEM; + goto free_sync; + } } else { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } err = dma_fence_chain_find_seqno(&sync->fence, sync_in.timeline_value); if (err) - return err; + goto free_sync; } break; @@ -200,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); sync->ufence_chain_fence = dma_fence_chain_alloc(); - if (!sync->ufence_chain_fence) - return -ENOMEM; + if (!sync->ufence_chain_fence) { + err = -ENOMEM; + goto free_sync; + } sync->ufence_syncobj = ufence_syncobj; } @@ -216,6 +224,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, sync->timeline_value = sync_in.timeline_value; return 0; + +free_sync: + xe_sync_entry_cleanup(sync); + return err; } ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a82e3a4..ffdbab10 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2554,7 +2554,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_prev) { op->remap.prev->tile_present = tile_present; - op->remap.prev = NULL; } } if (op->remap.next) { @@ -2564,11 +2563,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_next) { op->remap.next->tile_present = tile_present; - op->remap.next = NULL; } } - /* Adjust for partial unbind after removing VMA from VM */ + /* + * Adjust for partial unbind after removing VMA from VM. In case + * of unwind we might need to undo this later. + */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; @@ -2687,6 +2688,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); + op->remap.old_start = op->remap.start; + op->remap.old_range = op->remap.range; flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { @@ -2835,8 +2838,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; xe_svm_notifier_unlock(vm); - if (post_commit) + if (post_commit) { + /* + * Restore the old va range, in case of the + * prev/next skip optimisation. Otherwise what + * we re-insert here could be smaller than the + * original range. + */ + op->base.remap.unmap->va->va.addr = + op->remap.old_start; + op->base.remap.unmap->va->va.range = + op->remap.old_range; xe_vm_insert_vma(vm, vma); + } } break; }
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 95bf53c..b408612 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -408,8 +408,15 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_madvise *args = data; - struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, - .range = args->range, }; + struct xe_vmas_in_madvise_range madvise_range = { + /* + * Userspace may pass canonical (sign-extended) addresses. + * Strip the sign extension to get the internal non-canonical + * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior. + */ + .addr = xe_device_uncanonicalize_addr(xe, args->start), + .range = args->range, + }; struct xe_madvise_details details; struct xe_vm *vm; struct drm_exec exec; @@ -439,7 +446,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil if (err) goto unlock_vm; - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range); if (err) goto madv_fini; @@ -453,7 +460,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_range.num_vmas, args->atomic.val)) { err = -EINVAL; - goto madv_fini; + goto free_vmas; } } @@ -482,7 +489,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, &details); - err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr, + madvise_range.addr + args->range); if (madvise_range.has_svm_userptr_vmas) xe_svm_notifier_unlock(vm); @@ -490,6 +498,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil err_fini: if (madvise_range.has_bo_vmas) drm_exec_fini(&exec); +free_vmas: kfree(madvise_range.vmas); madvise_range.vmas = NULL; madv_fini:
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 437f642..e2946e3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -373,6 +373,10 @@ struct xe_vma_op_remap { u64 start; /** @range: range of the VMA unmap */ u64 range; + /** @old_start: Original start of the VMA we unmap */ + u64 old_start; + /** @old_range: Original range of the VMA we unmap */ + u64 old_range; /** @skip_prev: skip prev rebind */ bool skip_prev; /** @skip_next: skip next rebind */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7b1bd7..d7e309a 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -241,12 +241,14 @@ static const struct xe_rtp_entry_sr gt_was[] = { { XE_RTP_NAME("16025250150"), XE_RTP_RULES(GRAPHICS_VERSION(2001)), - XE_RTP_ACTIONS(SET(LSN_VC_REG2, - LSN_LNI_WGT(1) | - LSN_LNE_WGT(1) | - LSN_DIM_X_WGT(1) | - LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + XE_RTP_ACTIONS(FIELD_SET(LSN_VC_REG2, + LSN_LNI_WGT_MASK | LSN_LNE_WGT_MASK | + LSN_DIM_X_WGT_MASK | LSN_DIM_Y_WGT_MASK | + LSN_DIM_Z_WGT_MASK, + LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1)), + SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS)) }, /* Xe2_HPM */
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 174feac..c69adaa 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs
@@ -47,16 +47,12 @@ unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. - fn new(start: DmaAddress) -> Result<Self> { - let mut ptes = [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { - *pte = start - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; - } - - Ok(Self(ptes)) + /// Returns the page table entry for `index`, for a mapping starting at `start`. + // TODO: Replace with `IoView` projection once available. + fn entry(start: DmaAddress, index: usize) -> Result<u64> { + start + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW) } } @@ -86,16 +82,22 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?; + + let start_addr = obj.0.dma_handle(); // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region = unsafe { obj.0 - .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())? }; + // Write values one by one to avoid an on-stack instance of `PteArray`. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() { + let pte_value = PteArray::<0>::entry(start_addr, i)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } @@ -143,14 +145,14 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> impl PinInit<Self, Error // _kgspInitLibosLoggingStructures (allocates memory for buffers) // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) dma_write!( - libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) - )?; + libos, [0]?, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0) + ); dma_write!( - libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) - )?; - dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; - dma_write!(rmargs[0].inner = fw::GspArgumentsCached::new(cmdq))?; - dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; + libos, [1]?, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0) + ); + dma_write!(libos, [2]?, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)); + dma_write!(rmargs, [0]?.inner, fw::GspArgumentsCached::new(cmdq)); + dma_write!(libos, [3]?, LibosMemoryRegionInitArgument::new("RMARGS", rmargs)); }, })) })
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index be427fe..94833f7 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -157,7 +157,7 @@ pub(crate) fn boot( let wpr_meta = CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + dma_write!(wpr_meta, [0]?, GspFwWprMeta::new(&gsp_fw, &fb_layout)); self.cmdq .send_command(bar, commands::SetSystemInfo::new(pdev))?;
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 46819a8..03a4f35 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -2,11 +2,7 @@ use core::{ cmp, - mem, - sync::atomic::{ - fence, - Ordering, // - }, // + mem, // }; use kernel::{ @@ -146,30 +142,36 @@ struct MsgqData { #[repr(C)] // There is no struct defined for this in the open-gpu-kernel-source headers. // Instead it is defined by code in `GspMsgQueuesInit()`. -struct Msgq { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct Msgq { /// Header for sending messages, including the write pointer. - tx: MsgqTxHeader, + pub(super) tx: MsgqTxHeader, /// Header for receiving messages, including the read pointer. - rx: MsgqRxHeader, + pub(super) rx: MsgqRxHeader, /// The message queue proper. msgq: MsgqData, } /// Structure shared between the driver and the GSP and containing the command and message queues. #[repr(C)] -struct GspMem { +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// /// This member is read-only for the GSP. - cpuq: Msgq, + pub(super) cpuq: Msgq, /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the /// write and read pointers that the GSP updates. /// /// This member is read-only for the driver. - gspq: Msgq, + pub(super) gspq: Msgq, +} + +impl GspMem { + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>(); } // SAFETY: These structs don't meet the no-padding requirements of AsBytes but @@ -201,9 +203,19 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> { let gsp_mem = CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; - dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; - dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; - dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; + + let start = gsp_mem.dma_handle(); + // Write values one by one to avoid an on-stack instance of `PteArray`. + for i in 0..GspMem::PTE_ARRAY_SIZE { + dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?); + } + + dma_write!( + gsp_mem, + [0]?.cpuq.tx, + MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) + ); + dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new()); Ok(Self(gsp_mem)) } @@ -317,12 +329,7 @@ fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_write_ptr(&self.0) } // Returns the index of the memory page the GSP will read the next command from. @@ -331,12 +338,7 @@ fn gsp_write_ptr(&self) -> u32 { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::gsp_read_ptr(&self.0) } // Returns the index of the memory page the CPU can read the next message from. @@ -345,27 +347,12 @@ fn gsp_read_ptr(&self) -> u32 { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_read_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The ['CoherentAllocation'] contains at least one object. - // - By the invariants of CoherentAllocation the pointer is valid. - (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_read_ptr(&self.0) } // Informs the GSP that it can send `elem_count` new pages into the message queue. fn advance_cpu_read_ptr(&mut self, elem_count: u32) { - let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; - - // Ensure read pointer is properly ordered. - fence(Ordering::SeqCst); - - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; + super::fw::gsp_mem::advance_cpu_read_ptr(&self.0, elem_count) } // Returns the index of the memory page the CPU can write the next command to. @@ -374,26 +361,12 @@ fn advance_cpu_read_ptr(&mut self, elem_count: u32) { // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_write_ptr(&self) -> u32 { - let gsp_mem = self.0.start_ptr(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) + super::fw::gsp_mem::cpu_write_ptr(&self.0) } // Informs the GSP that it can process `elem_count` new pages from the command queue. fn advance_cpu_write_ptr(&mut self, elem_count: u32) { - let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; - let gsp_mem = self.0.start_ptr_mut(); - - // SAFETY: - // - The 'CoherentAllocation' contains at least one object. - // - By the invariants of `CoherentAllocation` the pointer is valid. - unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; - - // Ensure all command data is visible before triggering the GSP read. - fence(Ordering::SeqCst); + super::fw::gsp_mem::advance_cpu_write_ptr(&self.0, elem_count) } }
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 83ff9161..040b30e 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -40,6 +40,75 @@ }, }; +// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we +// switch to the new `dma::Coherent` API. +pub(super) mod gsp_mem { + use core::sync::atomic::{ + fence, + Ordering, // + }; + + use kernel::{ + dma::CoherentAllocation, + dma_read, + dma_write, + prelude::*, // + }; + + use crate::gsp::cmdq::{ + GspMem, + MSGQ_NUM_PAGES, // + }; + + pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation<GspMem>, count: u32) { + let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr); + Ok(()) + }() + .unwrap() + } + + pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 { + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap() + } + + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation<GspMem>, count: u32) { + let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // PANIC: A `dma::CoherentAllocation` always contains at least one element. + || -> Result { + dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr); + Ok(()) + }() + .unwrap(); + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + /// Empty type to group methods related to heap parameters for running the GSP firmware. enum GspFwHeapParams {} @@ -708,22 +777,6 @@ pub(crate) fn new(msgq_size: u32, rx_hdr_offset: u32, msg_count: u32) -> Self { entryOff: num::usize_into_u32::<GSP_PAGE_SIZE>(), }) } - - /// Returns the value of the write pointer for this queue. - pub(crate) fn write_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the write pointer for this queue. - pub(crate) fn set_write_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.writePtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data. @@ -739,22 +792,6 @@ impl MsgqRxHeader { pub(crate) fn new() -> Self { Self(Default::default()) } - - /// Returns the value of the read pointer for this queue. - pub(crate) fn read_ptr(&self) -> u32 { - let ptr = core::ptr::from_ref(&self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.read_volatile() } - } - - /// Sets the value of the read pointer for this queue. - pub(crate) fn set_read_ptr(&mut self, val: u32) { - let ptr = core::ptr::from_mut(&mut self.0.readPtr); - - // SAFETY: `ptr` is a valid pointer to a `u32`. - unsafe { ptr.write_volatile(val) } - } } // SAFETY: Padding is explicit and does not contain uninitialized data.
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index f3d1599..50c7b45 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -444,6 +444,8 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, (u64)(long)ctx, true); /* prevent infinite recursions */ + if (ret > size) + ret = size; if (ret > 0) memcpy(buf, dma_data, ret);
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index b949b76..fc5897a 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c
@@ -365,6 +365,9 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = { { "A3R" }, { "hfd.cn" }, { "WKB603" }, + { "TH87" }, /* EPOMAKER TH87 BT mode */ + { "HFD Epomaker TH87" }, /* EPOMAKER TH87 USB mode */ + { "2.4G Wireless Receiver" }, /* EPOMAKER TH87 dongle */ }; static bool apple_is_non_apple_keyboard(struct hid_device *hdev) @@ -686,9 +689,7 @@ static const __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "fixing up Magic Keyboard battery report descriptor\n"); *rsize = *rsize - 1; - rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + rdesc = rdesc + 1; rdesc[0] = 0x05; rdesc[1] = 0x01;
diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index a1db3b3..0fdc096 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c
@@ -476,7 +476,7 @@ static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg) return 0; } -static int appletb_kbd_reset_resume(struct hid_device *hdev) +static int appletb_kbd_resume(struct hid_device *hdev) { struct appletb_kbd *kbd = hid_get_drvdata(hdev); @@ -500,7 +500,8 @@ static struct hid_driver appletb_kbd_hid_driver = { .event = appletb_kbd_hid_event, .input_configured = appletb_kbd_input_configured, .suspend = pm_ptr(appletb_kbd_suspend), - .reset_resume = pm_ptr(appletb_kbd_reset_resume), + .resume = pm_ptr(appletb_kbd_resume), + .reset_resume = pm_ptr(appletb_kbd_resume), .driver.dev_groups = appletb_kbd_groups, }; module_hid_driver(appletb_kbd_hid_driver);
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 8ffcd12..bc93b27 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c
@@ -1399,14 +1399,21 @@ static const __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, */ if (*rsize == rsize_orig && rdesc[offs] == 0x09 && rdesc[offs + 1] == 0x76) { - *rsize = rsize_orig + 1; - rdesc = kmemdup(rdesc, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + __u8 *new_rdesc; + + new_rdesc = devm_kzalloc(&hdev->dev, rsize_orig + 1, + GFP_KERNEL); + if (!new_rdesc) + return rdesc; hid_info(hdev, "Fixing up %s keyb report descriptor\n", drvdata->quirks & QUIRK_T100CHI ? "T100CHI" : "T90CHI"); + + memcpy(new_rdesc, rdesc, rsize_orig); + *rsize = rsize_orig + 1; + rdesc = new_rdesc; + memmove(rdesc + offs + 4, rdesc + offs + 2, 12); rdesc[offs] = 0x19; rdesc[offs + 1] = 0x00; @@ -1491,6 +1498,12 @@ static const struct hid_device_id asus_devices[] = { USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD | QUIRK_ROG_ALLY_XPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_XGM_2022), + }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_XGM_2023), + }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
diff --git a/drivers/hid/hid-cmedia.c b/drivers/hid/hid-cmedia.c index 6bc50df..7b3dd41 100644 --- a/drivers/hid/hid-cmedia.c +++ b/drivers/hid/hid-cmedia.c
@@ -99,7 +99,7 @@ static int cmhid_raw_event(struct hid_device *hid, struct hid_report *report, { struct cmhid *cm = hid_get_drvdata(hid); - if (len != CM6533_JD_RAWEV_LEN) + if (len != CM6533_JD_RAWEV_LEN || !(hid->claimed & HID_CLAIMED_INPUT)) goto out; if (memcmp(data+CM6533_JD_SFX_OFFSET, ji_sfx, sizeof(ji_sfx))) goto out;
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 840a601..833df14 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c
@@ -2057,9 +2057,10 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * rsize = max_buffer_size; if (csize < rsize) { - dbg_hid("report %d is too short, (%d < %d)\n", report->id, - csize, rsize); - memset(cdata + csize, 0, rsize - csize); + hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %d)\n", + report->id, rsize, csize); + ret = -EINVAL; + goto out; } if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event)
diff --git a/drivers/hid/hid-creative-sb0540.c b/drivers/hid/hid-creative-sb0540.c index b4c8e7a..dfd6add 100644 --- a/drivers/hid/hid-creative-sb0540.c +++ b/drivers/hid/hid-creative-sb0540.c
@@ -153,7 +153,7 @@ static int creative_sb0540_raw_event(struct hid_device *hid, u64 code, main_code; int key; - if (len != 6) + if (len != 6 || !(hid->claimed & HID_CLAIMED_INPUT)) return 0; /* From daemons/hw_hiddev.c sb0540_rec() in lirc */
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3e299a3..afcee13 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h
@@ -229,6 +229,8 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 +#define USB_DEVICE_ID_ASUSTEK_XGM_2022 0x1970 +#define USB_DEVICE_ID_ASUSTEK_XGM_2023 0x1a9a #define USB_VENDOR_ID_ATEN 0x0557 #define USB_DEVICE_ID_ATEN_UC100KM 0x2004 @@ -453,8 +455,6 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 -#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 -#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_CHROMEBOOK_TROGDOR_POMPOM 0x2F81 #define USB_VENDOR_ID_ELECOM 0x056e
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d5308ad..9475b7e 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c
@@ -354,6 +354,7 @@ static enum power_supply_property hidinput_battery_props[] = { #define HID_BATTERY_QUIRK_FEATURE (1 << 1) /* ask for feature report */ #define HID_BATTERY_QUIRK_IGNORE (1 << 2) /* completely ignore the battery */ #define HID_BATTERY_QUIRK_AVOID_QUERY (1 << 3) /* do not query the battery */ +#define HID_BATTERY_QUIRK_DYNAMIC (1 << 4) /* report present only after life signs */ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, @@ -386,10 +387,6 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), - HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), - HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L), HID_BATTERY_QUIRK_AVOID_QUERY }, { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW), @@ -402,8 +399,8 @@ static const struct hid_device_id hid_battery_quirks[] = { * Elan HID touchscreens seem to all report a non present battery, * set HID_BATTERY_QUIRK_IGNORE for all Elan I2C and USB HID devices. */ - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_DYNAMIC }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, HID_ANY_ID), HID_BATTERY_QUIRK_DYNAMIC }, {} }; @@ -460,11 +457,14 @@ static int hidinput_get_battery_property(struct power_supply *psy, int ret = 0; switch (prop) { - case POWER_SUPPLY_PROP_PRESENT: case POWER_SUPPLY_PROP_ONLINE: val->intval = 1; break; + case POWER_SUPPLY_PROP_PRESENT: + val->intval = dev->battery_present; + break; + case POWER_SUPPLY_PROP_CAPACITY: if (dev->battery_status != HID_BATTERY_REPORTED && !dev->battery_avoid_query) { @@ -577,6 +577,8 @@ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY) dev->battery_avoid_query = true; + dev->battery_present = (quirks & HID_BATTERY_QUIRK_DYNAMIC) ? false : true; + dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg); if (IS_ERR(dev->battery)) { error = PTR_ERR(dev->battery); @@ -632,6 +634,7 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, return; if (hidinput_update_battery_charge_status(dev, usage, value)) { + dev->battery_present = true; power_supply_changed(dev->battery); return; } @@ -647,6 +650,7 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, if (dev->battery_status != HID_BATTERY_REPORTED || capacity != dev->battery_capacity || ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) { + dev->battery_present = true; dev->battery_capacity = capacity; dev->battery_status = HID_BATTERY_REPORTED; dev->battery_ratelimit_time =
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index d409328..d1dea72 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c
@@ -4487,10 +4487,12 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!ret) ret = hidpp_ff_init(hidpp, &data); - if (ret) + if (ret) { hid_warn(hidpp->hid_dev, "Unable to initialize force feedback support, errno %d\n", ret); + ret = 0; + } } /* @@ -4668,6 +4670,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb038) }, { /* Slim Solar+ K980 Keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb391) }, + { /* MX Master 4 mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb042) }, {} };
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 91f621c..9eadf32 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c
@@ -990,13 +990,11 @@ static const __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, */ if ((is_usb_magicmouse2(hdev->vendor, hdev->product) || is_usb_magictrackpad2(hdev->vendor, hdev->product)) && - *rsize == 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) { + *rsize >= 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) { hid_info(hdev, "fixing up magicmouse battery report descriptor\n"); *rsize = *rsize - 1; - rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL); - if (!rdesc) - return NULL; + rdesc = rdesc + 1; rdesc[0] = 0x05; rdesc[1] = 0x01;
diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 33603b0..ef3b5c7 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c
@@ -353,6 +353,8 @@ static int mcp_i2c_smbus_read(struct mcp2221 *mcp, usleep_range(90, 100); retries++; } else { + usleep_range(980, 1000); + mcp_cancel_last_cmd(mcp); return ret; } } else {
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7daa8f6..e82a3c4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c
@@ -77,6 +77,7 @@ MODULE_LICENSE("GPL"); #define MT_QUIRK_ORIENTATION_INVERT BIT(22) #define MT_QUIRK_APPLE_TOUCHBAR BIT(23) #define MT_QUIRK_YOGABOOK9I BIT(24) +#define MT_QUIRK_KEEP_LATENCY_ON_CLOSE BIT(25) #define MT_INPUTMODE_TOUCHSCREEN 0x02 #define MT_INPUTMODE_TOUCHPAD 0x03 @@ -214,6 +215,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); #define MT_CLS_WIN_8_DISABLE_WAKEUP 0x0016 #define MT_CLS_WIN_8_NO_STICKY_FINGERS 0x0017 #define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU 0x0018 +#define MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE 0x0019 /* vendor specific classes */ #define MT_CLS_3M 0x0101 @@ -233,6 +235,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); #define MT_CLS_SMART_TECH 0x0113 #define MT_CLS_APPLE_TOUCHBAR 0x0114 #define MT_CLS_YOGABOOK9I 0x0115 +#define MT_CLS_EGALAX_P80H84 0x0116 #define MT_CLS_SIS 0x0457 #define MT_DEFAULT_MAXCONTACT 10 @@ -334,6 +337,15 @@ static const struct mt_class mt_classes[] = { MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_WIN8_PTP_BUTTONS, .export_all_inputs = true }, + { .name = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + .quirks = MT_QUIRK_ALWAYS_VALID | + MT_QUIRK_IGNORE_DUPLICATES | + MT_QUIRK_HOVERING | + MT_QUIRK_CONTACT_CNT_ACCURATE | + MT_QUIRK_STICKY_FINGERS | + MT_QUIRK_WIN8_PTP_BUTTONS | + MT_QUIRK_KEEP_LATENCY_ON_CLOSE, + .export_all_inputs = true }, /* * vendor specific classes @@ -438,6 +450,11 @@ static const struct mt_class mt_classes[] = { MT_QUIRK_YOGABOOK9I, .export_all_inputs = true }, + { .name = MT_CLS_EGALAX_P80H84, + .quirks = MT_QUIRK_ALWAYS_VALID | + MT_QUIRK_IGNORE_DUPLICATES | + MT_QUIRK_CONTACT_CNT_ACCURATE, + }, { } }; @@ -509,12 +526,19 @@ static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) dev_warn(&hdev->dev, "failed to fetch feature %d\n", report->id); } else { + /* The report ID in the request and the response should match */ + if (report->id != buf[0]) { + hid_err(hdev, "Returned feature report did not match the request\n"); + goto free; + } + ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, size, 0); if (ret) dev_warn(&hdev->dev, "failed to report feature\n"); } +free: kfree(buf); } @@ -849,7 +873,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, if ((cls->name == MT_CLS_WIN_8 || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU || - cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) && + cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP || + cls->name == MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE) && (field->application == HID_DG_TOUCHPAD || field->application == HID_DG_TOUCHSCREEN)) app->quirks |= MT_QUIRK_CONFIDENCE; @@ -1762,7 +1787,8 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) int ret; if (td->is_haptic_touchpad && (td->mtclass.name == MT_CLS_WIN_8 || - td->mtclass.name == MT_CLS_WIN_8_FORCE_MULTI_INPUT)) { + td->mtclass.name == MT_CLS_WIN_8_FORCE_MULTI_INPUT || + td->mtclass.name == MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE)) { if (hid_haptic_input_configured(hdev, td->haptic, hi) == 0) td->is_haptic_touchpad = false; } else { @@ -2075,7 +2101,12 @@ static void mt_on_hid_hw_open(struct hid_device *hdev) static void mt_on_hid_hw_close(struct hid_device *hdev) { - mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_NONE); + struct mt_device *td = hid_get_drvdata(hdev); + + if (td->mtclass.quirks & MT_QUIRK_KEEP_LATENCY_ON_CLOSE) + mt_set_modes(hdev, HID_LATENCY_NORMAL, TOUCHPAD_REPORT_NONE); + else + mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_NONE); } /* @@ -2215,8 +2246,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C000) }, - { .driver_data = MT_CLS_EGALAX, - MT_USB_DEVICE(USB_VENDOR_ID_DWAV, + { .driver_data = MT_CLS_EGALAX_P80H84, + HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C002) }, /* Elan devices */ @@ -2461,6 +2493,14 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_UNITEC, USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19) }, + /* Uniwill touchpads */ + { .driver_data = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_PIXART, 0x0255) }, + { .driver_data = MT_CLS_WIN_8_KEEP_LATENCY_ON_CLOSE, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_PIXART, 0x0274) }, + /* VTL panels */ { .driver_data = MT_CLS_VTL, MT_USB_DEVICE(USB_VENDOR_ID_VTL,
diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c index 3bdb26f..1aae80f 100644 --- a/drivers/hid/hid-zydacron.c +++ b/drivers/hid/hid-zydacron.c
@@ -114,7 +114,7 @@ static int zc_raw_event(struct hid_device *hdev, struct hid_report *report, unsigned key; unsigned short index; - if (report->id == data[0]) { + if (report->id == data[0] && (hdev->claimed & HID_CLAIMED_INPUT)) { /* break keys */ for (index = 0; index < 4; index++) {
diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index fa5d68c..2738997 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -39,6 +39,8 @@ #define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 #define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 #define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45 +#define PCI_DEVICE_ID_INTEL_ISH_NVL_H 0xD354 +#define PCI_DEVICE_ID_INTEL_ISH_NVL_S 0x6E78 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 1612e8c..ed3405c 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -28,11 +28,15 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_LNL_M, ISHTP_DRIVER_DATA_PTL, ISHTP_DRIVER_DATA_WCL, + ISHTP_DRIVER_DATA_NVL_H, + ISHTP_DRIVER_DATA_NVL_S, }; #define ISH_FW_GEN_LNL_M "lnlm" #define ISH_FW_GEN_PTL "ptl" #define ISH_FW_GEN_WCL "wcl" +#define ISH_FW_GEN_NVL_H "nvlh" +#define ISH_FW_GEN_NVL_S "nvls" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -47,6 +51,12 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_WCL] = { .fw_generation = ISH_FW_GEN_WCL, }, + [ISHTP_DRIVER_DATA_NVL_H] = { + .fw_generation = ISH_FW_GEN_NVL_H, + }, + [ISHTP_DRIVER_DATA_NVL_S] = { + .fw_generation = ISH_FW_GEN_NVL_S, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -76,6 +86,8 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)}, {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)}, {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)}, + {PCI_DEVICE_DATA(INTEL, ISH_NVL_H, ISHTP_DRIVER_DATA_NVL_H)}, + {PCI_DEVICE_DATA(INTEL, ISH_NVL_S, ISHTP_DRIVER_DATA_NVL_S)}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c index f9fcb39..8075992 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-hid.c
@@ -127,6 +127,7 @@ int quicki2c_hid_probe(struct quicki2c_device *qcdev) hid->product = le16_to_cpu(qcdev->dev_desc.product_id); snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "quicki2c-hid", hid->vendor, hid->product); + strscpy(hid->phys, dev_name(qcdev->dev), sizeof(hid->phys)); ret = hid_add_device(hid); if (ret) {
diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c index 82c72bf..91d5807 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-hid.c
@@ -118,6 +118,7 @@ int quickspi_hid_probe(struct quickspi_device *qsdev) hid->product = le16_to_cpu(qsdev->dev_desc.product_id); snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", "quickspi-hid", hid->vendor, hid->product); + strscpy(hid->phys, dev_name(qsdev->dev), sizeof(hid->phys)); ret = hid_add_device(hid); if (ret) {
diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c index fdcf034..fbf3dbc 100644 --- a/drivers/hid/usbhid/hid-pidff.c +++ b/drivers/hid/usbhid/hid-pidff.c
@@ -1452,10 +1452,13 @@ static int pidff_init_fields(struct pidff_device *pidff, struct input_dev *dev) hid_warn(pidff->hid, "unknown ramp effect layout\n"); if (PIDFF_FIND_FIELDS(set_condition, PID_SET_CONDITION, 1)) { - if (test_and_clear_bit(FF_SPRING, dev->ffbit) || - test_and_clear_bit(FF_DAMPER, dev->ffbit) || - test_and_clear_bit(FF_FRICTION, dev->ffbit) || - test_and_clear_bit(FF_INERTIA, dev->ffbit)) + bool test = false; + + test |= test_and_clear_bit(FF_SPRING, dev->ffbit); + test |= test_and_clear_bit(FF_DAMPER, dev->ffbit); + test |= test_and_clear_bit(FF_FRICTION, dev->ffbit); + test |= test_and_clear_bit(FF_INERTIA, dev->ffbit); + if (test) hid_warn(pidff->hid, "unknown condition effect layout\n"); }
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 9b2c710..da1f0ea 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c
@@ -1208,10 +1208,20 @@ static int wacom_intuos_bt_irq(struct wacom_wac *wacom, size_t len) switch (data[0]) { case 0x04: + if (len < 32) { + dev_warn(wacom->pen_input->dev.parent, + "Report 0x04 too short: %zu bytes\n", len); + break; + } wacom_intuos_bt_process_data(wacom, data + i); i += 10; fallthrough; case 0x03: + if (i == 1 && len < 22) { + dev_warn(wacom->pen_input->dev.parent, + "Report 0x03 too short: %zu bytes\n", len); + break; + } wacom_intuos_bt_process_data(wacom, data + i); i += 10; wacom_intuos_bt_process_data(wacom, data + i);
diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c index c28aac0..fdffd4f 100644 --- a/drivers/hv/mshv_regions.c +++ b/drivers/hv/mshv_regions.c
@@ -314,15 +314,17 @@ int mshv_region_pin(struct mshv_mem_region *region) ret = pin_user_pages_fast(userspace_addr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, pages); - if (ret < 0) + if (ret != nr_pages) goto release_pages; } return 0; release_pages: + if (ret > 0) + done_count += ret; mshv_region_invalidate_pages(region, 0, done_count); - return ret; + return ret < 0 ? ret : -ENOMEM; } static int mshv_region_chunk_unmap(struct mshv_mem_region *region,
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h index 04c2a19..826798f 100644 --- a/drivers/hv/mshv_root.h +++ b/drivers/hv/mshv_root.h
@@ -190,7 +190,6 @@ struct hv_synic_pages { }; struct mshv_root { - struct hv_synic_pages __percpu *synic_pages; spinlock_t pt_ht_lock; DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS); struct hv_partition_property_vmm_capabilities vmm_caps; @@ -249,8 +248,8 @@ int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid); void mshv_isr(void); -int mshv_synic_init(unsigned int cpu); -int mshv_synic_cleanup(unsigned int cpu); +int mshv_synic_init(struct device *dev); +void mshv_synic_exit(void); static inline bool mshv_partition_encrypted(struct mshv_partition *partition) {
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 82ff823..c8e5523 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c
@@ -120,7 +120,6 @@ static u16 mshv_passthru_hvcalls[] = { HVCALL_SET_VP_REGISTERS, HVCALL_TRANSLATE_VIRTUAL_ADDRESS, HVCALL_CLEAR_VIRTUAL_INTERRUPT, - HVCALL_SCRUB_PARTITION, HVCALL_REGISTER_INTERCEPT_RESULT, HVCALL_ASSERT_VIRTUAL_INTERRUPT, HVCALL_GET_GPA_PAGES_ACCESS_STATES, @@ -631,7 +630,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) { struct mshv_partition *p = vp->vp_partition; struct mshv_mem_region *region; - bool ret; + bool ret = false; u64 gfn; #if defined(CONFIG_X86_64) struct hv_x64_memory_intercept_message *msg = @@ -642,6 +641,8 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) (struct hv_arm64_memory_intercept_message *) vp->vp_intercept_msg_page->u.payload; #endif + enum hv_intercept_access_type access_type = + msg->header.intercept_access_type; gfn = HVPFN_DOWN(msg->guest_physical_address); @@ -649,12 +650,19 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) if (!region) return false; + if (access_type == HV_INTERCEPT_ACCESS_WRITE && + !(region->hv_map_flags & HV_MAP_GPA_WRITABLE)) + goto put_region; + + if (access_type == HV_INTERCEPT_ACCESS_EXECUTE && + !(region->hv_map_flags & HV_MAP_GPA_EXECUTABLE)) + goto put_region; + /* Only movable memory ranges are supported for GPA intercepts */ if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) ret = mshv_region_handle_gfn_fault(region, gfn); - else - ret = false; +put_region: mshv_region_put(region); return ret; @@ -1289,7 +1297,7 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region) */ static long mshv_map_user_memory(struct mshv_partition *partition, - struct mshv_user_mem_region mem) + struct mshv_user_mem_region *mem) { struct mshv_mem_region *region; struct vm_area_struct *vma; @@ -1297,12 +1305,12 @@ mshv_map_user_memory(struct mshv_partition *partition, ulong mmio_pfn; long ret; - if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP) || - !access_ok((const void __user *)mem.userspace_addr, mem.size)) + if (mem->flags & BIT(MSHV_SET_MEM_BIT_UNMAP) || + !access_ok((const void __user *)mem->userspace_addr, mem->size)) return -EINVAL; mmap_read_lock(current->mm); - vma = vma_lookup(current->mm, mem.userspace_addr); + vma = vma_lookup(current->mm, mem->userspace_addr); is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0; mmio_pfn = is_mmio ? vma->vm_pgoff : 0; mmap_read_unlock(current->mm); @@ -1310,7 +1318,7 @@ mshv_map_user_memory(struct mshv_partition *partition, if (!vma) return -EINVAL; - ret = mshv_partition_create_region(partition, &mem, ®ion, + ret = mshv_partition_create_region(partition, mem, ®ion, is_mmio); if (ret) return ret; @@ -1348,32 +1356,32 @@ mshv_map_user_memory(struct mshv_partition *partition, return 0; errout: - vfree(region); + mshv_region_put(region); return ret; } /* Called for unmapping both the guest ram and the mmio space */ static long mshv_unmap_user_memory(struct mshv_partition *partition, - struct mshv_user_mem_region mem) + struct mshv_user_mem_region *mem) { struct mshv_mem_region *region; - if (!(mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP))) + if (!(mem->flags & BIT(MSHV_SET_MEM_BIT_UNMAP))) return -EINVAL; spin_lock(&partition->pt_mem_regions_lock); - region = mshv_partition_region_by_gfn(partition, mem.guest_pfn); + region = mshv_partition_region_by_gfn(partition, mem->guest_pfn); if (!region) { spin_unlock(&partition->pt_mem_regions_lock); return -ENOENT; } /* Paranoia check */ - if (region->start_uaddr != mem.userspace_addr || - region->start_gfn != mem.guest_pfn || - region->nr_pages != HVPFN_DOWN(mem.size)) { + if (region->start_uaddr != mem->userspace_addr || + region->start_gfn != mem->guest_pfn || + region->nr_pages != HVPFN_DOWN(mem->size)) { spin_unlock(&partition->pt_mem_regions_lock); return -EINVAL; } @@ -1404,9 +1412,9 @@ mshv_partition_ioctl_set_memory(struct mshv_partition *partition, return -EINVAL; if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP)) - return mshv_unmap_user_memory(partition, mem); + return mshv_unmap_user_memory(partition, &mem); - return mshv_map_user_memory(partition, mem); + return mshv_map_user_memory(partition, &mem); } static long @@ -2064,7 +2072,6 @@ mshv_dev_release(struct inode *inode, struct file *filp) return 0; } -static int mshv_cpuhp_online; static int mshv_root_sched_online; static const char *scheduler_type_to_string(enum hv_scheduler_type type) @@ -2249,27 +2256,6 @@ root_scheduler_deinit(void) free_percpu(root_scheduler_output); } -static int mshv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - cpuhp_remove_state(mshv_cpuhp_online); - return 0; -} - -struct notifier_block mshv_reboot_nb = { - .notifier_call = mshv_reboot_notify, -}; - -static void mshv_root_partition_exit(void) -{ - unregister_reboot_notifier(&mshv_reboot_nb); -} - -static int __init mshv_root_partition_init(struct device *dev) -{ - return register_reboot_notifier(&mshv_reboot_nb); -} - static int __init mshv_init_vmm_caps(struct device *dev) { int ret; @@ -2314,39 +2300,21 @@ static int __init mshv_parent_partition_init(void) MSHV_HV_MAX_VERSION); } - mshv_root.synic_pages = alloc_percpu(struct hv_synic_pages); - if (!mshv_root.synic_pages) { - dev_err(dev, "Failed to allocate percpu synic page\n"); - ret = -ENOMEM; + ret = mshv_synic_init(dev); + if (ret) goto device_deregister; - } - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic", - mshv_synic_init, - mshv_synic_cleanup); - if (ret < 0) { - dev_err(dev, "Failed to setup cpu hotplug state: %i\n", ret); - goto free_synic_pages; - } - - mshv_cpuhp_online = ret; ret = mshv_init_vmm_caps(dev); if (ret) - goto remove_cpu_state; + goto synic_cleanup; ret = mshv_retrieve_scheduler_type(dev); if (ret) - goto remove_cpu_state; - - if (hv_root_partition()) - ret = mshv_root_partition_init(dev); - if (ret) - goto remove_cpu_state; + goto synic_cleanup; ret = root_scheduler_init(dev); if (ret) - goto exit_partition; + goto synic_cleanup; ret = mshv_debugfs_init(); if (ret) @@ -2367,13 +2335,8 @@ static int __init mshv_parent_partition_init(void) mshv_debugfs_exit(); deinit_root_scheduler: root_scheduler_deinit(); -exit_partition: - if (hv_root_partition()) - mshv_root_partition_exit(); -remove_cpu_state: - cpuhp_remove_state(mshv_cpuhp_online); -free_synic_pages: - free_percpu(mshv_root.synic_pages); +synic_cleanup: + mshv_synic_exit(); device_deregister: misc_deregister(&mshv_dev); return ret; @@ -2387,10 +2350,7 @@ static void __exit mshv_parent_partition_exit(void) misc_deregister(&mshv_dev); mshv_irqfd_wq_cleanup(); root_scheduler_deinit(); - if (hv_root_partition()) - mshv_root_partition_exit(); - cpuhp_remove_state(mshv_cpuhp_online); - free_percpu(mshv_root.synic_pages); + mshv_synic_exit(); } module_init(mshv_parent_partition_init);
diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c index 216065e..43f1bcb 100644 --- a/drivers/hv/mshv_synic.c +++ b/drivers/hv/mshv_synic.c
@@ -10,13 +10,22 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/random.h> +#include <linux/cpuhotplug.h> +#include <linux/reboot.h> #include <asm/mshyperv.h> +#include <linux/acpi.h> #include "mshv_eventfd.h" #include "mshv.h" +static int synic_cpuhp_online; +static struct hv_synic_pages __percpu *synic_pages; +static int mshv_sint_vector = -1; /* hwirq for the SynIC SINTs */ +static int mshv_sint_irq = -1; /* Linux IRQ for mshv_sint_vector */ + static u32 synic_event_ring_get_queued_port(u32 sint_index) { struct hv_synic_event_ring_page **event_ring_page; @@ -26,7 +35,7 @@ static u32 synic_event_ring_get_queued_port(u32 sint_index) u32 message; u8 tail; - spages = this_cpu_ptr(mshv_root.synic_pages); + spages = this_cpu_ptr(synic_pages); event_ring_page = &spages->synic_event_ring_page; synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); @@ -393,7 +402,7 @@ mshv_intercept_isr(struct hv_message *msg) void mshv_isr(void) { - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_message *msg; bool handled; @@ -437,25 +446,21 @@ void mshv_isr(void) if (msg->header.message_flags.msg_pending) hv_set_non_nested_msr(HV_MSR_EOM, 0); -#ifdef HYPERVISOR_CALLBACK_VECTOR - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR); -#endif + add_interrupt_randomness(mshv_sint_vector); } else { pr_warn_once("%s: unknown message type 0x%x\n", __func__, msg->header.message_type); } } -int mshv_synic_init(unsigned int cpu) +static int mshv_synic_cpu_init(unsigned int cpu) { union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; -#ifdef HYPERVISOR_CALLBACK_VECTOR union hv_synic_sint sint; -#endif union hv_synic_scontrol sctrl; - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; @@ -496,10 +501,12 @@ int mshv_synic_init(unsigned int cpu) hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); -#ifdef HYPERVISOR_CALLBACK_VECTOR + if (mshv_sint_irq != -1) + enable_percpu_irq(mshv_sint_irq, 0); + /* Enable intercepts */ sint.as_uint64 = 0; - sint.vector = HYPERVISOR_CALLBACK_VECTOR; + sint.vector = mshv_sint_vector; sint.masked = false; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, @@ -507,13 +514,12 @@ int mshv_synic_init(unsigned int cpu) /* Doorbell SINT */ sint.as_uint64 = 0; - sint.vector = HYPERVISOR_CALLBACK_VECTOR; + sint.vector = mshv_sint_vector; sint.masked = false; sint.as_intercept = 1; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); -#endif /* Enable global synic bit */ sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); @@ -542,14 +548,14 @@ int mshv_synic_init(unsigned int cpu) return -EFAULT; } -int mshv_synic_cleanup(unsigned int cpu) +static int mshv_synic_cpu_exit(unsigned int cpu) { union hv_synic_sint sint; union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; union hv_synic_scontrol sctrl; - struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); + struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; @@ -568,6 +574,9 @@ int mshv_synic_cleanup(unsigned int cpu) hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); + if (mshv_sint_irq != -1) + disable_percpu_irq(mshv_sint_irq); + /* Disable Synic's event ring page */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); sirbp.sirbp_enabled = false; @@ -663,3 +672,152 @@ mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) mshv_portid_free(doorbell_portid); } + +static int mshv_synic_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (!hv_root_partition()) + return 0; + + cpuhp_remove_state(synic_cpuhp_online); + return 0; +} + +static struct notifier_block mshv_synic_reboot_nb = { + .notifier_call = mshv_synic_reboot_notify, +}; + +#ifndef HYPERVISOR_CALLBACK_VECTOR +static DEFINE_PER_CPU(long, mshv_evt); + +static irqreturn_t mshv_percpu_isr(int irq, void *dev_id) +{ + mshv_isr(); + return IRQ_HANDLED; +} + +#ifdef CONFIG_ACPI +static int __init mshv_acpi_setup_sint_irq(void) +{ + return acpi_register_gsi(NULL, mshv_sint_vector, ACPI_EDGE_SENSITIVE, + ACPI_ACTIVE_HIGH); +} + +static void mshv_acpi_cleanup_sint_irq(void) +{ + acpi_unregister_gsi(mshv_sint_vector); +} +#else +static int __init mshv_acpi_setup_sint_irq(void) +{ + return -ENODEV; +} + +static void mshv_acpi_cleanup_sint_irq(void) +{ +} +#endif + +static int __init mshv_sint_vector_setup(void) +{ + int ret; + struct hv_register_assoc reg = { + .name = HV_ARM64_REGISTER_SINT_RESERVED_INTERRUPT_ID, + }; + union hv_input_vtl input_vtl = { 0 }; + + if (acpi_disabled) + return -ENODEV; + + ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF, + 1, input_vtl, ®); + if (ret || !reg.value.reg64) + return -ENODEV; + + mshv_sint_vector = reg.value.reg64; + ret = mshv_acpi_setup_sint_irq(); + if (ret < 0) { + pr_err("Failed to setup IRQ for MSHV SINT vector %d: %d\n", + mshv_sint_vector, ret); + goto out_fail; + } + + mshv_sint_irq = ret; + + ret = request_percpu_irq(mshv_sint_irq, mshv_percpu_isr, "MSHV", + &mshv_evt); + if (ret) + goto out_unregister; + + return 0; + +out_unregister: + mshv_acpi_cleanup_sint_irq(); +out_fail: + return ret; +} + +static void mshv_sint_vector_cleanup(void) +{ + free_percpu_irq(mshv_sint_irq, &mshv_evt); + mshv_acpi_cleanup_sint_irq(); +} +#else /* !HYPERVISOR_CALLBACK_VECTOR */ +static int __init mshv_sint_vector_setup(void) +{ + mshv_sint_vector = HYPERVISOR_CALLBACK_VECTOR; + return 0; +} + +static void mshv_sint_vector_cleanup(void) +{ +} +#endif /* HYPERVISOR_CALLBACK_VECTOR */ + +int __init mshv_synic_init(struct device *dev) +{ + int ret = 0; + + ret = mshv_sint_vector_setup(); + if (ret) + return ret; + + synic_pages = alloc_percpu(struct hv_synic_pages); + if (!synic_pages) { + dev_err(dev, "Failed to allocate percpu synic page\n"); + ret = -ENOMEM; + goto sint_vector_cleanup; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic", + mshv_synic_cpu_init, + mshv_synic_cpu_exit); + if (ret < 0) { + dev_err(dev, "Failed to setup cpu hotplug state: %i\n", ret); + goto free_synic_pages; + } + + synic_cpuhp_online = ret; + + ret = register_reboot_notifier(&mshv_synic_reboot_nb); + if (ret) + goto remove_cpuhp_state; + + return 0; + +remove_cpuhp_state: + cpuhp_remove_state(synic_cpuhp_online); +free_synic_pages: + free_percpu(synic_pages); +sint_vector_cleanup: + mshv_sint_vector_cleanup(); + return ret; +} + +void mshv_synic_exit(void) +{ + unregister_reboot_notifier(&mshv_synic_reboot_nb); + cpuhp_remove_state(synic_cpuhp_online); + free_percpu(synic_pages); + mshv_sint_vector_cleanup(); +}
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 41c3817..3288672 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig
@@ -1493,8 +1493,7 @@ config SENSORS_LM75 tristate "National Semiconductor LM75 and compatibles" - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C select REGMAP_I2C select REGMAP_I3C if I3C help @@ -1927,16 +1926,6 @@ This driver can also be built as a module. If so, the module will be called raspberrypi-hwmon. -config SENSORS_SA67MCU - tristate "Kontron sa67mcu hardware monitoring driver" - depends on MFD_SL28CPLD || COMPILE_TEST - help - If you say yes here you get support for the voltage and temperature - monitor of the sa67 board management controller. - - This driver can also be built as a module. If so, the module - will be called sa67mcu-hwmon. - config SENSORS_SL28CPLD tristate "Kontron sl28cpld hardware monitoring driver" depends on MFD_SL28CPLD || COMPILE_TEST @@ -2392,8 +2381,7 @@ config SENSORS_TMP108 tristate "Texas Instruments TMP108" - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C select REGMAP_I2C select REGMAP_I3C if I3C help
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index eade8e3..5833c80 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile
@@ -199,7 +199,6 @@ obj-$(CONFIG_SENSORS_PWM_FAN) += pwm-fan.o obj-$(CONFIG_SENSORS_QNAP_MCU_HWMON) += qnap-mcu-hwmon.o obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o -obj-$(CONFIG_SENSORS_SA67MCU) += sa67mcu-hwmon.o obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o
diff --git a/drivers/hwmon/adm1177.c b/drivers/hwmon/adm1177.c index 8b2c965..7888afe8 100644 --- a/drivers/hwmon/adm1177.c +++ b/drivers/hwmon/adm1177.c
@@ -10,6 +10,8 @@ #include <linux/hwmon.h> #include <linux/i2c.h> #include <linux/init.h> +#include <linux/math64.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/regulator/consumer.h> @@ -33,7 +35,7 @@ struct adm1177_state { struct i2c_client *client; u32 r_sense_uohm; - u32 alert_threshold_ua; + u64 alert_threshold_ua; bool vrange_high; }; @@ -48,7 +50,7 @@ static int adm1177_write_cmd(struct adm1177_state *st, u8 cmd) } static int adm1177_write_alert_thr(struct adm1177_state *st, - u32 alert_threshold_ua) + u64 alert_threshold_ua) { u64 val; int ret; @@ -91,8 +93,8 @@ static int adm1177_read(struct device *dev, enum hwmon_sensor_types type, *val = div_u64((105840000ull * dummy), 4096 * st->r_sense_uohm); return 0; - case hwmon_curr_max_alarm: - *val = st->alert_threshold_ua; + case hwmon_curr_max: + *val = div_u64(st->alert_threshold_ua, 1000); return 0; default: return -EOPNOTSUPP; @@ -126,9 +128,10 @@ static int adm1177_write(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_curr: switch (attr) { - case hwmon_curr_max_alarm: - adm1177_write_alert_thr(st, val); - return 0; + case hwmon_curr_max: + val = clamp_val(val, 0, + div_u64(105840000ULL, st->r_sense_uohm)); + return adm1177_write_alert_thr(st, (u64)val * 1000); default: return -EOPNOTSUPP; } @@ -156,7 +159,7 @@ static umode_t adm1177_is_visible(const void *data, if (st->r_sense_uohm) return 0444; return 0; - case hwmon_curr_max_alarm: + case hwmon_curr_max: if (st->r_sense_uohm) return 0644; return 0; @@ -170,7 +173,7 @@ static umode_t adm1177_is_visible(const void *data, static const struct hwmon_channel_info * const adm1177_info[] = { HWMON_CHANNEL_INFO(curr, - HWMON_C_INPUT | HWMON_C_MAX_ALARM), + HWMON_C_INPUT | HWMON_C_MAX), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), NULL @@ -192,7 +195,8 @@ static int adm1177_probe(struct i2c_client *client) struct device *dev = &client->dev; struct device *hwmon_dev; struct adm1177_state *st; - u32 alert_threshold_ua; + u64 alert_threshold_ua; + u32 prop; int ret; st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); @@ -208,22 +212,26 @@ static int adm1177_probe(struct i2c_client *client) if (device_property_read_u32(dev, "shunt-resistor-micro-ohms", &st->r_sense_uohm)) st->r_sense_uohm = 0; - if (device_property_read_u32(dev, "adi,shutdown-threshold-microamp", - &alert_threshold_ua)) { - if (st->r_sense_uohm) - /* - * set maximum default value from datasheet based on - * shunt-resistor - */ - alert_threshold_ua = div_u64(105840000000, - st->r_sense_uohm); - else - alert_threshold_ua = 0; + if (!device_property_read_u32(dev, "adi,shutdown-threshold-microamp", + &prop)) { + alert_threshold_ua = prop; + } else if (st->r_sense_uohm) { + /* + * set maximum default value from datasheet based on + * shunt-resistor + */ + alert_threshold_ua = div_u64(105840000000ULL, + st->r_sense_uohm); + } else { + alert_threshold_ua = 0; } st->vrange_high = device_property_read_bool(dev, "adi,vrange-high-enable"); - if (alert_threshold_ua && st->r_sense_uohm) - adm1177_write_alert_thr(st, alert_threshold_ua); + if (alert_threshold_ua && st->r_sense_uohm) { + ret = adm1177_write_alert_thr(st, alert_threshold_ua); + if (ret) + return ret; + } ret = adm1177_write_cmd(st, ADM1177_CMD_V_CONT | ADM1177_CMD_I_CONT |
diff --git a/drivers/hwmon/aht10.c b/drivers/hwmon/aht10.c index 007befd..4ce019d 100644 --- a/drivers/hwmon/aht10.c +++ b/drivers/hwmon/aht10.c
@@ -37,7 +37,9 @@ #define AHT10_CMD_MEAS 0b10101100 #define AHT10_CMD_RST 0b10111010 -#define DHT20_CMD_INIT 0x71 +#define AHT20_CMD_INIT 0b10111110 + +#define DHT20_CMD_INIT 0b01110001 /* * Flags in the answer byte/command @@ -341,7 +343,7 @@ static int aht10_probe(struct i2c_client *client) data->meas_size = AHT20_MEAS_SIZE; data->crc8 = true; crc8_populate_msb(crc8_table, AHT20_CRC8_POLY); - data->init_cmd = AHT10_CMD_INIT; + data->init_cmd = AHT20_CMD_INIT; break; case dht20: data->meas_size = AHT20_MEAS_SIZE;
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 86f4444..adedaf0 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c
@@ -111,6 +111,8 @@ enum ec_sensors { ec_sensor_temp_mb, /* "T_Sensor" temperature sensor reading [℃] */ ec_sensor_temp_t_sensor, + /* like ec_sensor_temp_t_sensor, but at an alternate address [℃] */ + ec_sensor_temp_t_sensor_alt1, /* VRM temperature [℃] */ ec_sensor_temp_vrm, /* VRM east (right) temperature [℃] */ @@ -160,6 +162,7 @@ enum ec_sensors { #define SENSOR_TEMP_CPU_PACKAGE BIT(ec_sensor_temp_cpu_package) #define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb) #define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor) +#define SENSOR_TEMP_T_SENSOR_ALT1 BIT(ec_sensor_temp_t_sensor_alt1) #define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm) #define SENSOR_TEMP_VRME BIT(ec_sensor_temp_vrme) #define SENSOR_TEMP_VRMW BIT(ec_sensor_temp_vrmw) @@ -279,6 +282,8 @@ static const struct ec_sensor_info sensors_family_amd_600[] = { EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x33), [ec_sensor_temp_t_sensor] = EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x36), + [ec_sensor_temp_t_sensor_alt1] = + EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x37), [ec_sensor_fan_cpu_opt] = EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0), [ec_sensor_temp_water_in] = @@ -519,7 +524,7 @@ static const struct ec_board_info board_info_prime_x570_pro = { static const struct ec_board_info board_info_prime_x670e_pro_wifi = { .sensors = SENSOR_TEMP_CPU | SENSOR_TEMP_CPU_PACKAGE | SENSOR_TEMP_MB | SENSOR_TEMP_VRM | - SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CPU_OPT, + SENSOR_TEMP_T_SENSOR_ALT1 | SENSOR_FAN_CPU_OPT, .mutex_path = ACPI_GLOBAL_LOCK_PSEUDO_PATH, .family = family_amd_600_series, };
diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c index b7bb325..01590df 100644 --- a/drivers/hwmon/axi-fan-control.c +++ b/drivers/hwmon/axi-fan-control.c
@@ -507,7 +507,7 @@ static int axi_fan_control_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(&pdev->dev, ctl->irq, NULL, axi_fan_control_irq_handler, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, - pdev->driver_override, ctl); + NULL, ctl); if (ret) return dev_err_probe(&pdev->dev, ret, "failed to request an irq\n");
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index e233aaf..5cfb98a 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c
@@ -3590,10 +3590,13 @@ static int it87_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct it87_data *data = dev_get_drvdata(dev); + int err; it87_resume_sio(pdev); - it87_lock(data); + err = it87_lock(data); + if (err) + return err; it87_check_pwm(dev); it87_check_limit_regs(data);
diff --git a/drivers/hwmon/macsmc-hwmon.c b/drivers/hwmon/macsmc-hwmon.c index 1c0bbec..1500ec2 100644 --- a/drivers/hwmon/macsmc-hwmon.c +++ b/drivers/hwmon/macsmc-hwmon.c
@@ -22,6 +22,7 @@ #include <linux/bitfield.h> #include <linux/hwmon.h> +#include <linux/math64.h> #include <linux/mfd/macsmc.h> #include <linux/module.h> #include <linux/of.h> @@ -130,7 +131,7 @@ static int macsmc_hwmon_read_ioft_scaled(struct apple_smc *smc, smc_key key, if (ret < 0) return ret; - *p = mult_frac(val, scale, 65536); + *p = mul_u64_u32_div(val, scale, 65536); return 0; } @@ -140,7 +141,7 @@ static int macsmc_hwmon_read_ioft_scaled(struct apple_smc *smc, smc_key key, * them. */ static int macsmc_hwmon_read_f32_scaled(struct apple_smc *smc, smc_key key, - int *p, int scale) + long *p, int scale) { u32 fval; u64 val; @@ -162,21 +163,21 @@ static int macsmc_hwmon_read_f32_scaled(struct apple_smc *smc, smc_key key, val = 0; else if (exp < 0) val >>= -exp; - else if (exp != 0 && (val & ~((1UL << (64 - exp)) - 1))) /* overflow */ + else if (exp != 0 && (val & ~((1ULL << (64 - exp)) - 1))) /* overflow */ val = U64_MAX; else val <<= exp; if (fval & FLT_SIGN_MASK) { - if (val > (-(s64)INT_MIN)) - *p = INT_MIN; + if (val > (u64)LONG_MAX + 1) + *p = LONG_MIN; else - *p = -val; + *p = -(long)val; } else { - if (val > INT_MAX) - *p = INT_MAX; + if (val > (u64)LONG_MAX) + *p = LONG_MAX; else - *p = val; + *p = (long)val; } return 0; @@ -195,7 +196,7 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, switch (sensor->info.type_code) { /* 32-bit IEEE 754 float */ case __SMC_KEY('f', 'l', 't', ' '): { - u32 flt_ = 0; + long flt_ = 0; ret = macsmc_hwmon_read_f32_scaled(smc, sensor->macsmc_key, &flt_, scale); @@ -214,7 +215,10 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, if (ret) return ret; - *val = (long)ioft; + if (ioft > LONG_MAX) + *val = LONG_MAX; + else + *val = (long)ioft; break; } default: @@ -224,29 +228,26 @@ static int macsmc_hwmon_read_key(struct apple_smc *smc, return 0; } -static int macsmc_hwmon_write_f32(struct apple_smc *smc, smc_key key, int value) +static int macsmc_hwmon_write_f32(struct apple_smc *smc, smc_key key, long value) { u64 val; u32 fval = 0; - int exp = 0, neg; + int exp, neg; + neg = value < 0; val = abs(value); - neg = val != value; if (val) { - int msb = __fls(val) - exp; + exp = __fls(val); - if (msb > 23) { - val >>= msb - FLT_MANT_BIAS; - exp -= msb - FLT_MANT_BIAS; - } else if (msb < 23) { - val <<= FLT_MANT_BIAS - msb; - exp += msb; - } + if (exp > 23) + val >>= exp - 23; + else + val <<= 23 - exp; fval = FIELD_PREP(FLT_SIGN_MASK, neg) | FIELD_PREP(FLT_EXP_MASK, exp + FLT_EXP_BIAS) | - FIELD_PREP(FLT_MANT_MASK, val); + FIELD_PREP(FLT_MANT_MASK, val & FLT_MANT_MASK); } return apple_smc_write_u32(smc, key, fval); @@ -663,8 +664,8 @@ static int macsmc_hwmon_populate_sensors(struct macsmc_hwmon *hwmon, if (!hwmon->volt.sensors) return -ENOMEM; - for_each_child_of_node_with_prefix(hwmon_node, key_node, "volt-") { - sensor = &hwmon->temp.sensors[hwmon->temp.count]; + for_each_child_of_node_with_prefix(hwmon_node, key_node, "voltage-") { + sensor = &hwmon->volt.sensors[hwmon->volt.count]; if (!macsmc_hwmon_create_sensor(hwmon->dev, hwmon->smc, key_node, sensor)) { sensor->attrs = HWMON_I_INPUT;
diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c index a0a1dbb..163d31f 100644 --- a/drivers/hwmon/max6639.c +++ b/drivers/hwmon/max6639.c
@@ -232,7 +232,7 @@ static int max6639_read_fan(struct device *dev, u32 attr, int channel, static int max6639_set_ppr(struct max6639_data *data, int channel, u8 ppr) { /* Decrement the PPR value and shift left by 6 to match the register format */ - return regmap_write(data->regmap, MAX6639_REG_FAN_PPR(channel), ppr-- << 6); + return regmap_write(data->regmap, MAX6639_REG_FAN_PPR(channel), --ppr << 6); } static int max6639_write_fan(struct device *dev, u32 attr, int channel, @@ -524,8 +524,8 @@ static int max6639_probe_child_from_dt(struct i2c_client *client, { struct device *dev = &client->dev; - u32 i; - int err, val; + u32 i, val; + int err; err = of_property_read_u32(child, "reg", &i); if (err) { @@ -540,8 +540,8 @@ static int max6639_probe_child_from_dt(struct i2c_client *client, err = of_property_read_u32(child, "pulses-per-revolution", &val); if (!err) { - if (val < 1 || val > 5) { - dev_err(dev, "invalid pulses-per-revolution %d of %pOFn\n", val, child); + if (val < 1 || val > 4) { + dev_err(dev, "invalid pulses-per-revolution %u of %pOFn\n", val, child); return -EINVAL; } data->ppr[i] = val; @@ -607,7 +607,7 @@ static int max6639_init_client(struct i2c_client *client, return err; /* Fans PWM polarity high by default */ - err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x00); + err = regmap_write(data->regmap, MAX6639_REG_FAN_CONFIG2a(i), 0x02); if (err) return err;
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 89928d3..42cc606 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c
@@ -420,6 +420,12 @@ static ssize_t occ_show_freq_2(struct device *dev, return sysfs_emit(buf, "%u\n", val); } +static u64 occ_get_powr_avg(u64 accum, u32 samples) +{ + return (samples == 0) ? 0 : + mul_u64_u32_div(accum, 1000000UL, samples); +} + static ssize_t occ_show_power_1(struct device *dev, struct device_attribute *attr, char *buf) { @@ -441,9 +447,8 @@ static ssize_t occ_show_power_1(struct device *dev, val = get_unaligned_be16(&power->sensor_id); break; case 1: - val = get_unaligned_be32(&power->accumulator) / - get_unaligned_be32(&power->update_tag); - val *= 1000000ULL; + val = occ_get_powr_avg(get_unaligned_be32(&power->accumulator), + get_unaligned_be32(&power->update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->update_tag) * @@ -459,12 +464,6 @@ static ssize_t occ_show_power_1(struct device *dev, return sysfs_emit(buf, "%llu\n", val); } -static u64 occ_get_powr_avg(u64 accum, u32 samples) -{ - return (samples == 0) ? 0 : - mul_u64_u32_div(accum, 1000000UL, samples); -} - static ssize_t occ_show_power_2(struct device *dev, struct device_attribute *attr, char *buf) { @@ -725,7 +724,7 @@ static ssize_t occ_show_extended(struct device *dev, switch (sattr->nr) { case 0: if (extn->flags & EXTN_FLAG_SENSOR_ID) { - rc = sysfs_emit(buf, "%u", + rc = sysfs_emit(buf, "%u\n", get_unaligned_be32(&extn->sensor_id)); } else { rc = sysfs_emit(buf, "%4phN\n", extn->name);
diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index b2fc936..457089c 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c
@@ -131,7 +131,7 @@ static int get_temp_target(struct peci_cputemp *priv, enum peci_temp_target_type *val = priv->temp.target.tjmax; break; case crit_hyst_type: - *val = priv->temp.target.tjmax - priv->temp.target.tcontrol; + *val = priv->temp.target.tcontrol; break; default: ret = -EOPNOTSUPP; @@ -319,7 +319,7 @@ static umode_t cputemp_is_visible(const void *data, enum hwmon_sensor_types type { const struct peci_cputemp *priv = data; - if (channel > CPUTEMP_CHANNEL_NUMS) + if (channel >= CPUTEMP_CHANNEL_NUMS) return 0; if (channel < channel_core)
diff --git a/drivers/hwmon/pmbus/hac300s.c b/drivers/hwmon/pmbus/hac300s.c index 0a1d52c..a073db1 100644 --- a/drivers/hwmon/pmbus/hac300s.c +++ b/drivers/hwmon/pmbus/hac300s.c
@@ -58,6 +58,8 @@ static int hac300s_read_word_data(struct i2c_client *client, int page, case PMBUS_MFR_VOUT_MIN: case PMBUS_READ_VOUT: rv = pmbus_read_word_data(client, page, phase, reg); + if (rv < 0) + return rv; return FIELD_GET(LINEAR11_MANTISSA_MASK, rv); default: return -ENODATA;
diff --git a/drivers/hwmon/pmbus/ina233.c b/drivers/hwmon/pmbus/ina233.c index dde1e16..7aebd85 100644 --- a/drivers/hwmon/pmbus/ina233.c +++ b/drivers/hwmon/pmbus/ina233.c
@@ -67,10 +67,13 @@ static int ina233_read_word_data(struct i2c_client *client, int page, switch (reg) { case PMBUS_VIRT_READ_VMON: ret = pmbus_read_word_data(client, 0, 0xff, MFR_READ_VSHUNT); + if (ret < 0) + return ret; /* Adjust returned value to match VIN coefficients */ /* VIN: 1.25 mV VSHUNT: 2.5 uV LSB */ - ret = DIV_ROUND_CLOSEST(ret * 25, 12500); + ret = clamp_val(DIV_ROUND_CLOSEST((s16)ret * 25, 12500), + S16_MIN, S16_MAX) & 0xffff; break; default: ret = -ENODATA;
diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 97b6183..3e3a887 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c
@@ -96,10 +96,21 @@ static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client, int page, char *buf) { - int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + int val; - return sprintf(buf, "%d\n", - (val & ISL68137_VOUT_AVS) == ISL68137_VOUT_AVS ? 1 : 0); + val = pmbus_lock_interruptible(client); + if (val) + return val; + + val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + + pmbus_unlock(client); + + if (val < 0) + return val; + + return sysfs_emit(buf, "%d\n", + (val & ISL68137_VOUT_AVS) == ISL68137_VOUT_AVS); } static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, @@ -115,6 +126,10 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, op_val = result ? ISL68137_VOUT_AVS : 0; + rc = pmbus_lock_interruptible(client); + if (rc) + return rc; + /* * Writes to VOUT setpoint over AVSBus will persist after the VRM is * switched to PMBus control. Switching back to AVSBus control @@ -126,17 +141,20 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, rc = pmbus_read_word_data(client, page, 0xff, PMBUS_VOUT_COMMAND); if (rc < 0) - return rc; + goto unlock; rc = pmbus_write_word_data(client, page, PMBUS_VOUT_COMMAND, rc); if (rc < 0) - return rc; + goto unlock; } rc = pmbus_update_byte_data(client, page, PMBUS_OPERATION, ISL68137_VOUT_AVS, op_val); +unlock: + pmbus_unlock(client); + return (rc < 0) ? rc : count; }
diff --git a/drivers/hwmon/pmbus/ltc4286.c b/drivers/hwmon/pmbus/ltc4286.c index aabd0bc..8715d38 100644 --- a/drivers/hwmon/pmbus/ltc4286.c +++ b/drivers/hwmon/pmbus/ltc4286.c
@@ -173,3 +173,4 @@ module_i2c_driver(ltc4286_driver); MODULE_AUTHOR("Delphine CC Chiu <Delphine_CC_Chiu@wiwynn.com>"); MODULE_DESCRIPTION("PMBUS driver for LTC4286 and compatibles"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("PMBUS");
diff --git a/drivers/hwmon/pmbus/mp2869.c b/drivers/hwmon/pmbus/mp2869.c index cc69a1e..4647892 100644 --- a/drivers/hwmon/pmbus/mp2869.c +++ b/drivers/hwmon/pmbus/mp2869.c
@@ -165,7 +165,7 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) { const struct pmbus_driver_info *info = pmbus_get_driver_info(client); struct mp2869_data *data = to_mp2869_data(info); - int ret; + int ret, mfr; switch (reg) { case PMBUS_VOUT_MODE: @@ -188,11 +188,14 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(2, 2)) | FIELD_PREP(GENMASK(2, 2), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; case PMBUS_STATUS_TEMPERATURE: /* @@ -207,15 +210,16 @@ static int mp2869_read_byte_data(struct i2c_client *client, int page, int reg) if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(7, 6)) | FIELD_PREP(GENMASK(6, 6), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))) | + FIELD_GET(GENMASK(1, 1), mfr)) | FIELD_PREP(GENMASK(7, 7), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; default: ret = -ENODATA; @@ -230,7 +234,7 @@ static int mp2869_read_word_data(struct i2c_client *client, int page, int phase, { const struct pmbus_driver_info *info = pmbus_get_driver_info(client); struct mp2869_data *data = to_mp2869_data(info); - int ret; + int ret, mfr; switch (reg) { case PMBUS_STATUS_WORD: @@ -246,11 +250,14 @@ static int mp2869_read_word_data(struct i2c_client *client, int page, int phase, if (ret < 0) return ret; + mfr = pmbus_read_byte_data(client, page, + PMBUS_STATUS_MFR_SPECIFIC); + if (mfr < 0) + return mfr; + ret = (ret & ~GENMASK(2, 2)) | FIELD_PREP(GENMASK(2, 2), - FIELD_GET(GENMASK(1, 1), - pmbus_read_byte_data(client, page, - PMBUS_STATUS_MFR_SPECIFIC))); + FIELD_GET(GENMASK(1, 1), mfr)); break; case PMBUS_READ_VIN: /*
diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index c31982d..d0bc47b 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c
@@ -313,6 +313,8 @@ static int mp2973_read_word_data(struct i2c_client *client, int page, case PMBUS_STATUS_WORD: /* MP2973 & MP2971 return PGOOD instead of PB_STATUS_POWER_GOOD_N. */ ret = pmbus_read_word_data(client, page, phase, reg); + if (ret < 0) + return ret; ret ^= PB_STATUS_POWER_GOOD_N; break; case PMBUS_OT_FAULT_LIMIT:
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index be6d05d..572be3e 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -6,6 +6,7 @@ * Copyright (c) 2012 Guenter Roeck */ +#include <linux/atomic.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/dcache.h> @@ -21,8 +22,8 @@ #include <linux/pmbus.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> -#include <linux/of.h> #include <linux/thermal.h> +#include <linux/workqueue.h> #include "pmbus.h" /* @@ -112,6 +113,11 @@ struct pmbus_data { struct mutex update_lock; +#if IS_ENABLED(CONFIG_REGULATOR) + atomic_t regulator_events[PMBUS_PAGES]; + struct work_struct regulator_notify_work; +#endif + bool has_status_word; /* device uses STATUS_WORD register */ int (*read_status)(struct i2c_client *client, int page); @@ -1209,6 +1215,12 @@ static ssize_t pmbus_show_boolean(struct device *dev, return sysfs_emit(buf, "%d\n", val); } +static ssize_t pmbus_show_zero(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + return sysfs_emit(buf, "0\n"); +} + static ssize_t pmbus_show_sensor(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -1407,7 +1419,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, int reg, enum pmbus_sensor_classes class, bool update, bool readonly, - bool convert) + bool writeonly, bool convert) { struct pmbus_sensor *sensor; struct device_attribute *a; @@ -1436,7 +1448,8 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, sensor->data = -ENODATA; pmbus_dev_attr_init(a, sensor->name, readonly ? 0444 : 0644, - pmbus_show_sensor, pmbus_set_sensor); + writeonly ? pmbus_show_zero : pmbus_show_sensor, + pmbus_set_sensor); if (pmbus_add_attribute(data, &a->attr)) return NULL; @@ -1495,8 +1508,10 @@ static int pmbus_add_label(struct pmbus_data *data, struct pmbus_limit_attr { u16 reg; /* Limit register */ u16 sbit; /* Alarm attribute status bit */ - bool update; /* True if register needs updates */ - bool low; /* True if low limit; for limits with compare functions only */ + bool readonly:1; /* True if the attribute is read-only */ + bool writeonly:1; /* True if the attribute is write-only */ + bool update:1; /* True if register needs updates */ + bool low:1; /* True if low limit; for limits with compare functions only */ const char *attr; /* Attribute name */ const char *alarm; /* Alarm attribute name */ }; @@ -1511,9 +1526,9 @@ struct pmbus_sensor_attr { u8 nlimit; /* # of limit registers */ enum pmbus_sensor_classes class;/* sensor class */ const char *label; /* sensor label */ - bool paged; /* true if paged sensor */ - bool update; /* true if update needed */ - bool compare; /* true if compare function needed */ + bool paged:1; /* true if paged sensor */ + bool update:1; /* true if update needed */ + bool compare:1; /* true if compare function needed */ u32 func; /* sensor mask */ u32 sfunc; /* sensor status mask */ int sreg; /* status register */ @@ -1544,7 +1559,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client, curr = pmbus_add_sensor(data, name, l->attr, index, page, 0xff, l->reg, attr->class, attr->update || l->update, - false, true); + l->readonly, l->writeonly, true); if (!curr) return -ENOMEM; if (l->sbit && (info->func[page] & attr->sfunc)) { @@ -1584,7 +1599,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, return ret; } base = pmbus_add_sensor(data, name, "input", index, page, phase, - attr->reg, attr->class, true, true, true); + attr->reg, attr->class, true, true, false, true); if (!base) return -ENOMEM; /* No limit and alarm attributes for phase specific sensors */ @@ -1707,23 +1722,29 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VIN_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1776,23 +1797,29 @@ static const struct pmbus_limit_attr vout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VOUT_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1852,20 +1879,25 @@ static const struct pmbus_limit_attr iin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1889,20 +1921,25 @@ static const struct pmbus_limit_attr iout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1943,20 +1980,25 @@ static const struct pmbus_limit_attr pin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_PIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_PIN_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_PIN_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_PIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_PIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1980,20 +2022,25 @@ static const struct pmbus_limit_attr pout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_POUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_POUT_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_POUT_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_POUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_POUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -2049,18 +2096,23 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_1, + .readonly = true, .attr = "rated_max", }, }; @@ -2090,18 +2142,23 @@ static const struct pmbus_limit_attr temp_limit_attrs2[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP2_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP2_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP2_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP2_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_2, + .readonly = true, .attr = "rated_max", }, }; @@ -2131,6 +2188,7 @@ static const struct pmbus_limit_attr temp_limit_attrs3[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_MFR_MAX_TEMP_3, + .readonly = true, .attr = "rated_max", }, }; @@ -2214,7 +2272,7 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "fan", "target", index, page, 0xff, PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; @@ -2225,14 +2283,14 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "pwm", NULL, index, page, 0xff, PMBUS_VIRT_PWM_1 + id, PSC_PWM, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; sensor = pmbus_add_sensor(data, "pwm", "enable", index, page, 0xff, PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM, - true, false, false); + true, false, false, false); if (!sensor) return -ENOMEM; @@ -2274,7 +2332,7 @@ static int pmbus_add_fan_attributes(struct i2c_client *client, if (pmbus_add_sensor(data, "fan", "input", index, page, 0xff, pmbus_fan_registers[f], - PSC_FAN, true, true, true) == NULL) + PSC_FAN, true, true, false, true) == NULL) return -ENOMEM; /* Fan control */ @@ -3176,12 +3234,19 @@ static int pmbus_regulator_get_voltage(struct regulator_dev *rdev) .class = PSC_VOLTAGE_OUT, .convert = true, }; + int ret; + mutex_lock(&data->update_lock); s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_READ_VOUT); - if (s.data < 0) - return s.data; + if (s.data < 0) { + ret = s.data; + goto unlock; + } - return (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ + ret = (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, @@ -3198,16 +3263,22 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, }; int val = DIV_ROUND_CLOSEST(min_uv, 1000); /* convert to mV */ int low, high; + int ret; *selector = 0; + mutex_lock(&data->update_lock); low = pmbus_regulator_get_low_margin(client, s.page); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, s.page); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } /* Make sure we are within margins */ if (low > val) @@ -3217,7 +3288,10 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, val = pmbus_data2reg(data, &s, val); - return _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); + ret = _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, @@ -3227,6 +3301,7 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_data *data = i2c_get_clientdata(client); int val, low, high; + int ret; if (data->flags & PMBUS_VOUT_PROTECTED) return 0; @@ -3239,18 +3314,29 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, val = DIV_ROUND_CLOSEST(rdev->desc->min_uV + (rdev->desc->uV_step * selector), 1000); /* convert to mV */ + mutex_lock(&data->update_lock); + low = pmbus_regulator_get_low_margin(client, rdev_get_id(rdev)); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, rdev_get_id(rdev)); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } - if (val >= low && val <= high) - return val * 1000; /* unit is uV */ + if (val >= low && val <= high) { + ret = val * 1000; /* unit is uV */ + goto unlock; + } - return 0; + ret = 0; +unlock: + mutex_unlock(&data->update_lock); + return ret; } const struct regulator_ops pmbus_regulator_ops = { @@ -3281,12 +3367,42 @@ int pmbus_regulator_init_cb(struct regulator_dev *rdev, } EXPORT_SYMBOL_NS_GPL(pmbus_regulator_init_cb, "PMBUS"); +static void pmbus_regulator_notify_work_cancel(void *data) +{ + struct pmbus_data *pdata = data; + + cancel_work_sync(&pdata->regulator_notify_work); +} + +static void pmbus_regulator_notify_worker(struct work_struct *work) +{ + struct pmbus_data *data = + container_of(work, struct pmbus_data, regulator_notify_work); + int i, j; + + for (i = 0; i < data->info->pages; i++) { + int event; + + event = atomic_xchg(&data->regulator_events[i], 0); + if (!event) + continue; + + for (j = 0; j < data->info->num_regulators; j++) { + if (i == rdev_get_id(data->rdevs[j])) { + regulator_notifier_call_chain(data->rdevs[j], + event, NULL); + break; + } + } + } +} + static int pmbus_regulator_register(struct pmbus_data *data) { struct device *dev = data->dev; const struct pmbus_driver_info *info = data->info; const struct pmbus_platform_data *pdata = dev_get_platdata(dev); - int i; + int i, ret; data->rdevs = devm_kzalloc(dev, sizeof(struct regulator_dev *) * info->num_regulators, GFP_KERNEL); @@ -3310,19 +3426,19 @@ static int pmbus_regulator_register(struct pmbus_data *data) info->reg_desc[i].name); } + INIT_WORK(&data->regulator_notify_work, pmbus_regulator_notify_worker); + + ret = devm_add_action_or_reset(dev, pmbus_regulator_notify_work_cancel, data); + if (ret) + return ret; + return 0; } static void pmbus_regulator_notify(struct pmbus_data *data, int page, int event) { - int j; - - for (j = 0; j < data->info->num_regulators; j++) { - if (page == rdev_get_id(data->rdevs[j])) { - regulator_notifier_call_chain(data->rdevs[j], event, NULL); - break; - } - } + atomic_or(event, &data->regulator_events[page]); + schedule_work(&data->regulator_notify_work); } #else static int pmbus_regulator_register(struct pmbus_data *data)
diff --git a/drivers/hwmon/pmbus/pxe1610.c b/drivers/hwmon/pmbus/pxe1610.c index 6a4a978..24c1f96 100644 --- a/drivers/hwmon/pmbus/pxe1610.c +++ b/drivers/hwmon/pmbus/pxe1610.c
@@ -104,7 +104,10 @@ static int pxe1610_probe(struct i2c_client *client) * By default this device doesn't boot to page 0, so set page 0 * to access all pmbus registers. */ - i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0); + ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0); + if (ret < 0) + return dev_err_probe(&client->dev, ret, + "Failed to set page 0\n"); /* Read Manufacturer id */ ret = i2c_smbus_read_block_data(client, PMBUS_MFR_ID, buf);
diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c index fc030ca..d5d60a9 100644 --- a/drivers/hwmon/pmbus/q54sj108a2.c +++ b/drivers/hwmon/pmbus/q54sj108a2.c
@@ -79,7 +79,8 @@ static ssize_t q54sj108a2_debugfs_read(struct file *file, char __user *buf, int idx = *idxp; struct q54sj108a2_data *psu = to_psu(idxp, idx); char data[I2C_SMBUS_BLOCK_MAX + 2] = { 0 }; - char data_char[I2C_SMBUS_BLOCK_MAX + 2] = { 0 }; + char data_char[I2C_SMBUS_BLOCK_MAX * 2 + 2] = { 0 }; + char *out = data; char *res; switch (idx) { @@ -150,27 +151,27 @@ static ssize_t q54sj108a2_debugfs_read(struct file *file, char __user *buf, if (rc < 0) return rc; - res = bin2hex(data, data_char, 32); - rc = res - data; - + res = bin2hex(data_char, data, rc); + rc = res - data_char; + out = data_char; break; case Q54SJ108A2_DEBUGFS_FLASH_KEY: rc = i2c_smbus_read_block_data(psu->client, PMBUS_FLASH_KEY_WRITE, data); if (rc < 0) return rc; - res = bin2hex(data, data_char, 4); - rc = res - data; - + res = bin2hex(data_char, data, rc); + rc = res - data_char; + out = data_char; break; default: return -EINVAL; } - data[rc] = '\n'; + out[rc] = '\n'; rc += 2; - return simple_read_from_buffer(buf, count, ppos, data, rc); + return simple_read_from_buffer(buf, count, ppos, out, rc); } static ssize_t q54sj108a2_debugfs_write(struct file *file, const char __user *buf,
diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c index ca2bfa2..249974c 100644 --- a/drivers/hwmon/pmbus/tps53679.c +++ b/drivers/hwmon/pmbus/tps53679.c
@@ -103,10 +103,10 @@ static int tps53679_identify_chip(struct i2c_client *client, } ret = i2c_smbus_read_block_data(client, PMBUS_IC_DEVICE_ID, buf); - if (ret < 0) - return ret; + if (ret <= 0) + return ret < 0 ? ret : -EIO; - /* Adjust length if null terminator if present */ + /* Adjust length if null terminator is present */ buf_len = (buf[ret - 1] != '\x00' ? ret : ret - 1); id_len = strlen(id); @@ -175,8 +175,8 @@ static int tps53676_identify(struct i2c_client *client, ret = i2c_smbus_read_block_data(client, PMBUS_IC_DEVICE_ID, buf); if (ret < 0) return ret; - if (strncmp("TI\x53\x67\x60", buf, 5)) { - dev_err(&client->dev, "Unexpected device ID: %s\n", buf); + if (ret != 6 || memcmp(buf, "TI\x53\x67\x60\x00", 6)) { + dev_err(&client->dev, "Unexpected device ID: %*ph\n", ret, buf); return -ENODEV; }
diff --git a/drivers/hwmon/sa67mcu-hwmon.c b/drivers/hwmon/sa67mcu-hwmon.c deleted file mode 100644 index 22f703b..0000000 --- a/drivers/hwmon/sa67mcu-hwmon.c +++ /dev/null
@@ -1,161 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sl67mcu hardware monitoring driver - * - * Copyright 2025 Kontron Europe GmbH - */ - -#include <linux/bitfield.h> -#include <linux/hwmon.h> -#include <linux/kernel.h> -#include <linux/mod_devicetable.h> -#include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/property.h> -#include <linux/regmap.h> - -#define SA67MCU_VOLTAGE(n) (0x00 + ((n) * 2)) -#define SA67MCU_TEMP(n) (0x04 + ((n) * 2)) - -struct sa67mcu_hwmon { - struct regmap *regmap; - u32 offset; -}; - -static int sa67mcu_hwmon_read(struct device *dev, - enum hwmon_sensor_types type, u32 attr, - int channel, long *input) -{ - struct sa67mcu_hwmon *hwmon = dev_get_drvdata(dev); - unsigned int offset; - u8 reg[2]; - int ret; - - switch (type) { - case hwmon_in: - switch (attr) { - case hwmon_in_input: - offset = hwmon->offset + SA67MCU_VOLTAGE(channel); - break; - default: - return -EOPNOTSUPP; - } - break; - case hwmon_temp: - switch (attr) { - case hwmon_temp_input: - offset = hwmon->offset + SA67MCU_TEMP(channel); - break; - default: - return -EOPNOTSUPP; - } - break; - default: - return -EOPNOTSUPP; - } - - /* Reading the low byte will capture the value */ - ret = regmap_bulk_read(hwmon->regmap, offset, reg, ARRAY_SIZE(reg)); - if (ret) - return ret; - - *input = reg[1] << 8 | reg[0]; - - /* Temperatures are s16 and in 0.1degC steps. */ - if (type == hwmon_temp) - *input = sign_extend32(*input, 15) * 100; - - return 0; -} - -static const struct hwmon_channel_info * const sa67mcu_hwmon_info[] = { - HWMON_CHANNEL_INFO(in, - HWMON_I_INPUT | HWMON_I_LABEL, - HWMON_I_INPUT | HWMON_I_LABEL), - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), - NULL -}; - -static const char *const sa67mcu_hwmon_in_labels[] = { - "VDDIN", - "VDD_RTC", -}; - -static int sa67mcu_hwmon_read_string(struct device *dev, - enum hwmon_sensor_types type, u32 attr, - int channel, const char **str) -{ - switch (type) { - case hwmon_in: - switch (attr) { - case hwmon_in_label: - *str = sa67mcu_hwmon_in_labels[channel]; - return 0; - default: - return -EOPNOTSUPP; - } - default: - return -EOPNOTSUPP; - } -} - -static const struct hwmon_ops sa67mcu_hwmon_ops = { - .visible = 0444, - .read = sa67mcu_hwmon_read, - .read_string = sa67mcu_hwmon_read_string, -}; - -static const struct hwmon_chip_info sa67mcu_hwmon_chip_info = { - .ops = &sa67mcu_hwmon_ops, - .info = sa67mcu_hwmon_info, -}; - -static int sa67mcu_hwmon_probe(struct platform_device *pdev) -{ - struct sa67mcu_hwmon *hwmon; - struct device *hwmon_dev; - int ret; - - if (!pdev->dev.parent) - return -ENODEV; - - hwmon = devm_kzalloc(&pdev->dev, sizeof(*hwmon), GFP_KERNEL); - if (!hwmon) - return -ENOMEM; - - hwmon->regmap = dev_get_regmap(pdev->dev.parent, NULL); - if (!hwmon->regmap) - return -ENODEV; - - ret = device_property_read_u32(&pdev->dev, "reg", &hwmon->offset); - if (ret) - return -EINVAL; - - hwmon_dev = devm_hwmon_device_register_with_info(&pdev->dev, - "sa67mcu_hwmon", hwmon, - &sa67mcu_hwmon_chip_info, - NULL); - if (IS_ERR(hwmon_dev)) - dev_err(&pdev->dev, "failed to register as hwmon device"); - - return PTR_ERR_OR_ZERO(hwmon_dev); -} - -static const struct of_device_id sa67mcu_hwmon_of_match[] = { - { .compatible = "kontron,sa67mcu-hwmon", }, - {} -}; -MODULE_DEVICE_TABLE(of, sa67mcu_hwmon_of_match); - -static struct platform_driver sa67mcu_hwmon_driver = { - .probe = sa67mcu_hwmon_probe, - .driver = { - .name = "sa67mcu-hwmon", - .of_match_table = sa67mcu_hwmon_of_match, - }, -}; -module_platform_driver(sa67mcu_hwmon_driver); - -MODULE_DESCRIPTION("sa67mcu Hardware Monitoring Driver"); -MODULE_AUTHOR("Michael Walle <mwalle@kernel.org>"); -MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e11d507..7cb6b9b 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig
@@ -1213,6 +1213,8 @@ tristate "NVIDIA Tegra internal I2C controller" depends on ARCH_TEGRA || (COMPILE_TEST && (ARC || ARM || ARM64 || M68K || RISCV || SUPERH || SPARC)) # COMPILE_TEST needs architectures with readsX()/writesX() primitives + depends on PINCTRL + # ARCH_TEGRA implies PINCTRL, but the COMPILE_TEST side doesn't. help If you say yes to this option, support will be included for the I2C controller embedded in NVIDIA Tegra SOCs
diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c index e2d7cd2..8212875 100644 --- a/drivers/i2c/busses/i2c-cp2615.c +++ b/drivers/i2c/busses/i2c-cp2615.c
@@ -298,6 +298,9 @@ cp2615_i2c_probe(struct usb_interface *usbif, const struct usb_device_id *id) if (!adap) return -ENOMEM; + if (!usbdev->serial) + return -EINVAL; + strscpy(adap->name, usbdev->serial, sizeof(adap->name)); adap->owner = THIS_MODULE; adap->dev.parent = &usbif->dev;
diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index c48728a..9f0ec0f 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c
@@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/soc/amd/isp4_misc.h> @@ -76,22 +77,20 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) device_enable_async_suspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - + dev_pm_genpd_resume(&pdev->dev); ret = i2c_dw_probe(isp_i2c_dev); if (ret) { dev_err_probe(&pdev->dev, ret, "i2c_dw_probe failed\n"); goto error_release_rpm; } - - pm_runtime_put_sync(&pdev->dev); + dev_pm_genpd_suspend(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_enable(&pdev->dev); return 0; error_release_rpm: amd_isp_dw_i2c_plat_pm_cleanup(isp_i2c_dev); - pm_runtime_put_sync(&pdev->dev); return ret; }
diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index 82c87e0..b2dc5ae 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c
@@ -729,6 +729,7 @@ static int fsi_i2c_probe(struct fsi_device *fsi_dev) rc = i2c_add_adapter(&port->adapter); if (rc < 0) { dev_err(dev, "Failed to register adapter: %d\n", rc); + of_node_put(np); kfree(port); continue; }
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 9e17897..32a3cef 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c
@@ -310,9 +310,10 @@ struct i801_priv { /* * If set to true the host controller registers are reserved for - * ACPI AML use. + * ACPI AML use. Needs extra protection by acpi_lock. */ bool acpi_reserved; + struct mutex acpi_lock; }; #define FEATURE_SMBUS_PEC BIT(0) @@ -894,8 +895,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, int hwpec, ret; struct i801_priv *priv = i2c_get_adapdata(adap); - if (priv->acpi_reserved) + mutex_lock(&priv->acpi_lock); + if (priv->acpi_reserved) { + mutex_unlock(&priv->acpi_lock); return -EBUSY; + } pm_runtime_get_sync(&priv->pci_dev->dev); @@ -935,6 +939,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, iowrite8(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); pm_runtime_put_autosuspend(&priv->pci_dev->dev); + mutex_unlock(&priv->acpi_lock); return ret; } @@ -1465,7 +1470,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, * further access from the driver itself. This device is now owned * by the system firmware. */ - i2c_lock_bus(&priv->adapter, I2C_LOCK_SEGMENT); + mutex_lock(&priv->acpi_lock); if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; @@ -1485,7 +1490,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, else status = acpi_os_write_port(address, (u32)*value, bits); - i2c_unlock_bus(&priv->adapter, I2C_LOCK_SEGMENT); + mutex_unlock(&priv->acpi_lock); return status; } @@ -1545,6 +1550,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) priv->adapter.dev.parent = &dev->dev; acpi_use_parent_companion(&priv->adapter.dev); priv->adapter.retries = 3; + mutex_init(&priv->acpi_lock); priv->pci_dev = dev; priv->features = id->driver_data;
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 85f5540..452d120 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c
@@ -1018,8 +1018,9 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx) return 0; } -static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) +static inline enum imx_i2c_state i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) { + enum imx_i2c_state next_state = IMX_I2C_STATE_READ_CONTINUE; unsigned int temp; if ((i2c_imx->msg->len - 1) == i2c_imx->msg_buf_idx) { @@ -1033,18 +1034,20 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) i2c_imx->stopped = 1; temp &= ~(I2CR_MSTA | I2CR_MTX); imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); - } else { - /* - * For i2c master receiver repeat restart operation like: - * read -> repeat MSTA -> read/write - * The controller must set MTX before read the last byte in - * the first read operation, otherwise the first read cost - * one extra clock cycle. - */ - temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); - temp |= I2CR_MTX; - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); + + return IMX_I2C_STATE_DONE; } + /* + * For i2c master receiver repeat restart operation like: + * read -> repeat MSTA -> read/write + * The controller must set MTX before read the last byte in + * the first read operation, otherwise the first read cost + * one extra clock cycle. + */ + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); + temp |= I2CR_MTX; + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); + next_state = IMX_I2C_STATE_DONE; } else if (i2c_imx->msg_buf_idx == (i2c_imx->msg->len - 2)) { temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_TXAK; @@ -1052,6 +1055,7 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) } i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + return next_state; } static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_imx) @@ -1088,11 +1092,9 @@ static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned i break; case IMX_I2C_STATE_READ_CONTINUE: - i2c_imx_isr_read_continue(i2c_imx); - if (i2c_imx->msg_buf_idx == i2c_imx->msg->len) { - i2c_imx->state = IMX_I2C_STATE_DONE; + i2c_imx->state = i2c_imx_isr_read_continue(i2c_imx); + if (i2c_imx->state == IMX_I2C_STATE_DONE) wake_up(&i2c_imx->queue); - } break; case IMX_I2C_STATE_READ_BLOCK_DATA: @@ -1490,6 +1492,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bool is_lastmsg) { int block_data = msgs->flags & I2C_M_RECV_LEN; + int ret = 0; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1522,10 +1525,20 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, dev_err(&i2c_imx->adapter.dev, "<%s> read timedout\n", __func__); return -ETIMEDOUT; } - if (!i2c_imx->stopped) - return i2c_imx_bus_busy(i2c_imx, 0, false); + if (i2c_imx->is_lastmsg) { + if (!i2c_imx->stopped) + ret = i2c_imx_bus_busy(i2c_imx, 0, false); + /* + * Only read the last byte of the last message after the bus is + * not busy. Else the controller generates another clock which + * might confuse devices. + */ + if (!ret) + i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx, + IMX_I2C_I2DR); + } - return 0; + return ret; } static int i2c_imx_xfer_common(struct i2c_adapter *adapter,
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 09af3b3..f55840b 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c
@@ -268,6 +268,7 @@ struct pxa_i2c { struct pinctrl *pinctrl; struct pinctrl_state *pinctrl_default; struct pinctrl_state *pinctrl_recovery; + bool reset_before_xfer; }; #define _IBMR(i2c) ((i2c)->reg_ibmr) @@ -1144,6 +1145,11 @@ static int i2c_pxa_xfer(struct i2c_adapter *adap, { struct pxa_i2c *i2c = adap->algo_data; + if (i2c->reset_before_xfer) { + i2c_pxa_reset(i2c); + i2c->reset_before_xfer = false; + } + return i2c_pxa_internal_xfer(i2c, msgs, num, i2c_pxa_do_xfer); } @@ -1521,7 +1527,16 @@ static int i2c_pxa_probe(struct platform_device *dev) } } - i2c_pxa_reset(i2c); + /* + * Skip reset on Armada 3700 when recovery is used to avoid + * controller hang due to the pinctrl state changes done by + * the generic recovery initialization code. The reset will + * be performed later, prior to the first transfer. + */ + if (i2c_type == REGS_A3700 && i2c->adap.bus_recovery_info) + i2c->reset_before_xfer = true; + else + i2c_pxa_reset(i2c); ret = i2c_add_numbered_adapter(&i2c->adap); if (ret < 0)
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index bec619b..4eaeb39 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c
@@ -2047,8 +2047,11 @@ static int tegra_i2c_probe(struct platform_device *pdev) * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't * be used for atomic transfers. ACPI device is not IRQ safe also. + * + * Devices with pinctrl states cannot be marked IRQ-safe as the pinctrl + * state transitions during runtime PM require mutexes. */ - if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev) && !i2c_dev->dev->pins) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev);
diff --git a/drivers/i3c/Kconfig b/drivers/i3c/Kconfig index 30a4415..626c54b 100644 --- a/drivers/i3c/Kconfig +++ b/drivers/i3c/Kconfig
@@ -22,3 +22,15 @@ if I3C source "drivers/i3c/master/Kconfig" endif # I3C + +config I3C_OR_I2C + tristate + default m if I3C=m + default I2C + help + Device drivers using module_i3c_i2c_driver() can use either + i2c or i3c hosts, but cannot be built-in for the kernel when + CONFIG_I3C=m. + + Add 'depends on I2C_OR_I3C' in Kconfig for those drivers to + get the correct dependencies.
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index d87bde3..d6bdb32 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c
@@ -1024,7 +1024,7 @@ static int dw_i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev, master->free_pos &= ~BIT(pos); } - writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(dev->info.dyn_addr), + writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(dev->info.dyn_addr) | DEV_ADDR_TABLE_SIR_REJECT, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); @@ -1053,7 +1053,7 @@ static int dw_i3c_master_attach_i3c_dev(struct i3c_dev_desc *dev) master->free_pos &= ~BIT(pos); i3c_dev_set_master_data(dev, data); - writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr), + writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(master->devs[pos].addr) | DEV_ADDR_TABLE_SIR_REJECT, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); @@ -1659,6 +1659,8 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, pm_runtime_get_noresume(&pdev->dev); INIT_WORK(&master->hj_work, dw_i3c_hj_work); + + device_set_of_node_from_dev(&master->base.i2c.dev, &pdev->dev); ret = i3c_master_register(&master->base, &pdev->dev, &dw_mipi_i3c_ops, false); if (ret)
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd.h b/drivers/i3c/master/mipi-i3c-hci/cmd.h index 1d6dd2c..b1bf87d 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd.h +++ b/drivers/i3c/master/mipi-i3c-hci/cmd.h
@@ -17,6 +17,7 @@ #define CMD_0_TOC W0_BIT_(31) #define CMD_0_ROC W0_BIT_(30) #define CMD_0_ATTR W0_MASK(2, 0) +#define CMD_0_TID W0_MASK(6, 3) /* * Response Descriptor Structure
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c index fe26046..75d452d 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c +++ b/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c
@@ -331,12 +331,10 @@ static int hci_cmd_v1_daa(struct i3c_hci *hci) CMD_A0_ROC | CMD_A0_TOC; xfer->cmd_desc[1] = 0; xfer->completion = &done; - hci->io->queue_xfer(hci, xfer, 1); - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, 1)) { - ret = -ETIME; + xfer->timeout = HZ; + ret = i3c_hci_process_xfer(hci, xfer, 1); + if (ret) break; - } if ((RESP_STATUS(xfer->response) == RESP_ERR_ADDR_HEADER || RESP_STATUS(xfer->response) == RESP_ERR_NACK) && RESP_DATA_LENGTH(xfer->response) == 1) {
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c index 3729e64..39eec26 100644 --- a/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c +++ b/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c
@@ -253,6 +253,7 @@ static int hci_cmd_v2_daa(struct i3c_hci *hci) xfer[0].rnw = true; xfer[0].cmd_desc[1] = CMD_A1_DATA_LENGTH(8); xfer[1].completion = &done; + xfer[1].timeout = HZ; for (;;) { ret = i3c_master_get_free_addr(&hci->master, next_addr); @@ -272,12 +273,9 @@ static int hci_cmd_v2_daa(struct i3c_hci *hci) CMD_A0_ASSIGN_ADDRESS(next_addr) | CMD_A0_ROC | CMD_A0_TOC; - hci->io->queue_xfer(hci, xfer, 2); - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, 2)) { - ret = -ETIME; + ret = i3c_hci_process_xfer(hci, xfer, 2); + if (ret) break; - } if (RESP_STATUS(xfer[0].response) != RESP_SUCCESS) { ret = 0; /* no more devices to be assigned */ break;
diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index 5879bba..284f3ed 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
@@ -152,7 +152,11 @@ static int i3c_hci_bus_init(struct i3c_master_controller *m) if (hci->quirks & HCI_QUIRK_RESP_BUF_THLD) amd_set_resp_buf_thld(hci); - reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = false; + + /* Enable bus with Hot-Join disabled */ + reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE | HC_CONTROL_HOT_JOIN_CTRL); dev_dbg(&hci->master.dev, "HC_CONTROL = %#x", reg_read(HC_CONTROL)); return 0; @@ -177,21 +181,51 @@ static int i3c_hci_bus_disable(struct i3c_hci *hci) return ret; } +static int i3c_hci_software_reset(struct i3c_hci *hci) +{ + u32 regval; + int ret; + + /* + * SOFT_RST must be clear before we write to it. + * Then we must wait until it clears again. + */ + ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, + !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); + if (ret) { + dev_err(&hci->master.dev, "%s: Software reset stuck\n", __func__); + return ret; + } + + reg_write(RESET_CONTROL, SOFT_RST); + + ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, + !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); + if (ret) { + dev_err(&hci->master.dev, "%s: Software reset failed\n", __func__); + return ret; + } + + return 0; +} + void i3c_hci_sync_irq_inactive(struct i3c_hci *hci) { struct platform_device *pdev = to_platform_device(hci->master.dev.parent); int irq = platform_get_irq(pdev, 0); reg_write(INTR_SIGNAL_ENABLE, 0x0); - hci->irq_inactive = true; synchronize_irq(irq); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = true; } static void i3c_hci_bus_cleanup(struct i3c_master_controller *m) { struct i3c_hci *hci = to_i3c_hci(m); - i3c_hci_bus_disable(hci); + if (i3c_hci_bus_disable(hci)) + i3c_hci_software_reset(hci); hci->io->cleanup(hci); } @@ -212,6 +246,36 @@ void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci) reg_write(DCT_SECTION, FIELD_PREP(DCT_TABLE_INDEX, 0)); } +int i3c_hci_process_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) +{ + struct completion *done = xfer[n - 1].completion; + unsigned long timeout = xfer[n - 1].timeout; + int ret; + + ret = hci->io->queue_xfer(hci, xfer, n); + if (ret) + return ret; + + if (!wait_for_completion_timeout(done, timeout)) { + if (hci->io->dequeue_xfer(hci, xfer, n)) { + dev_err(&hci->master.dev, "%s: timeout error\n", __func__); + return -ETIMEDOUT; + } + return 0; + } + + if (hci->io->handle_error) { + bool error = false; + + for (int i = 0; i < n && !error; i++) + error = RESP_STATUS(xfer[i].response); + if (error) + return hci->io->handle_error(hci, xfer, n); + } + + return 0; +} + static int i3c_hci_send_ccc_cmd(struct i3c_master_controller *m, struct i3c_ccc_cmd *ccc) { @@ -252,18 +316,14 @@ static int i3c_hci_send_ccc_cmd(struct i3c_master_controller *m, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = HZ; if (prefixed) xfer--; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = prefixed; i < nxfers; i++) { if (ccc->rnw) ccc->dests[i - prefixed].payload.len = @@ -334,15 +394,11 @@ static int i3c_hci_i3c_xfers(struct i3c_dev_desc *dev, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = HZ; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, HZ) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = 0; i < nxfers; i++) { if (i3c_xfers[i].rnw) i3c_xfers[i].len = RESP_DATA_LENGTH(xfer[i].response); @@ -382,15 +438,11 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev, last = i - 1; xfer[last].cmd_desc[0] |= CMD_0_TOC; xfer[last].completion = &done; + xfer[last].timeout = m->i2c.timeout; - ret = hci->io->queue_xfer(hci, xfer, nxfers); + ret = i3c_hci_process_xfer(hci, xfer, nxfers); if (ret) goto out; - if (!wait_for_completion_timeout(&done, m->i2c.timeout) && - hci->io->dequeue_xfer(hci, xfer, nxfers)) { - ret = -ETIME; - goto out; - } for (i = 0; i < nxfers; i++) { if (RESP_STATUS(xfer[i].response) != RESP_SUCCESS) { ret = -EIO; @@ -566,6 +618,8 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) irqreturn_t result = IRQ_NONE; u32 val; + guard(spinlock)(&hci->lock); + /* * The IRQ can be shared, so the handler may be called when the IRQ is * due to a different device. That could happen when runtime suspended, @@ -601,34 +655,6 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id) return result; } -static int i3c_hci_software_reset(struct i3c_hci *hci) -{ - u32 regval; - int ret; - - /* - * SOFT_RST must be clear before we write to it. - * Then we must wait until it clears again. - */ - ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, - !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); - if (ret) { - dev_err(&hci->master.dev, "%s: Software reset stuck\n", __func__); - return ret; - } - - reg_write(RESET_CONTROL, SOFT_RST); - - ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval, - !(regval & SOFT_RST), 0, 10 * USEC_PER_MSEC); - if (ret) { - dev_err(&hci->master.dev, "%s: Software reset failed\n", __func__); - return ret; - } - - return 0; -} - static inline bool is_version_1_1_or_newer(struct i3c_hci *hci) { return hci->version_major > 1 || (hci->version_major == 1 && hci->version_minor > 0); @@ -739,8 +765,12 @@ static int i3c_hci_runtime_suspend(struct device *dev) int ret; ret = i3c_hci_bus_disable(hci); - if (ret) + if (ret) { + /* Fall back to software reset to disable the bus */ + ret = i3c_hci_software_reset(hci); + i3c_hci_sync_irq_inactive(hci); return ret; + } hci->io->suspend(hci); @@ -760,11 +790,13 @@ static int i3c_hci_runtime_resume(struct device *dev) mipi_i3c_hci_dat_v1.restore(hci); - hci->irq_inactive = false; - hci->io->resume(hci); - reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE); + scoped_guard(spinlock_irqsave, &hci->lock) + hci->irq_inactive = false; + + /* Enable bus with Hot-Join disabled */ + reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE | HC_CONTROL_HOT_JOIN_CTRL); return 0; } @@ -924,6 +956,9 @@ static int i3c_hci_probe(struct platform_device *pdev) if (!hci) return -ENOMEM; + spin_lock_init(&hci->lock); + mutex_init(&hci->control_mutex); + /* * Multi-bus instances share the same MMIO address range, but not * necessarily in separate contiguous sub-ranges. To avoid overlapping @@ -950,6 +985,8 @@ static int i3c_hci_probe(struct platform_device *pdev) if (ret) return ret; + hci->irq_inactive = true; + irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, irq, i3c_hci_irq_handler, IRQF_SHARED, NULL, hci);
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index b903a2d..e487ef5 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -129,9 +129,8 @@ struct hci_rh_data { dma_addr_t xfer_dma, resp_dma, ibi_status_dma, ibi_data_dma; unsigned int xfer_entries, ibi_status_entries, ibi_chunks_total; unsigned int xfer_struct_sz, resp_struct_sz, ibi_status_sz, ibi_chunk_sz; - unsigned int done_ptr, ibi_chunk_ptr; + unsigned int done_ptr, ibi_chunk_ptr, xfer_space; struct hci_xfer **src_xfers; - spinlock_t lock; struct completion op_done; }; @@ -261,6 +260,7 @@ static void hci_dma_init_rh(struct i3c_hci *hci, struct hci_rh_data *rh, int i) rh->done_ptr = 0; rh->ibi_chunk_ptr = 0; + rh->xfer_space = rh->xfer_entries; } static void hci_dma_init_rings(struct i3c_hci *hci) @@ -344,7 +344,6 @@ static int hci_dma_init(struct i3c_hci *hci) goto err_out; rh = &rings->headers[i]; rh->regs = hci->base_regs + offset; - spin_lock_init(&rh->lock); init_completion(&rh->op_done); rh->xfer_entries = XFER_RING_ENTRIES; @@ -439,26 +438,63 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci, } } +static struct i3c_dma *hci_dma_map_xfer(struct device *dev, struct hci_xfer *xfer) +{ + enum dma_data_direction dir = xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + bool need_bounce = device_iommu_mapped(dev) && xfer->rnw && (xfer->data_len & 3); + + return i3c_master_dma_map_single(dev, xfer->data, xfer->data_len, need_bounce, dir); +} + +static int hci_dma_map_xfer_list(struct i3c_hci *hci, struct device *dev, + struct hci_xfer *xfer_list, int n) +{ + for (int i = 0; i < n; i++) { + struct hci_xfer *xfer = xfer_list + i; + + if (!xfer->data) + continue; + + xfer->dma = hci_dma_map_xfer(dev, xfer); + if (!xfer->dma) { + hci_dma_unmap_xfer(hci, xfer_list, i); + return -ENOMEM; + } + } + + return 0; +} + static int hci_dma_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer_list, int n) { struct hci_rings_data *rings = hci->io_data; struct hci_rh_data *rh; unsigned int i, ring, enqueue_ptr; - u32 op1_val, op2_val; + u32 op1_val; + int ret; + + ret = hci_dma_map_xfer_list(hci, rings->sysdev, xfer_list, n); + if (ret) + return ret; /* For now we only use ring 0 */ ring = 0; rh = &rings->headers[ring]; + spin_lock_irq(&hci->lock); + + if (n > rh->xfer_space) { + spin_unlock_irq(&hci->lock); + hci_dma_unmap_xfer(hci, xfer_list, n); + return -EBUSY; + } + op1_val = rh_reg_read(RING_OPERATION1); enqueue_ptr = FIELD_GET(RING_OP1_CR_ENQ_PTR, op1_val); for (i = 0; i < n; i++) { struct hci_xfer *xfer = xfer_list + i; u32 *ring_data = rh->xfer + rh->xfer_struct_sz * enqueue_ptr; - enum dma_data_direction dir = xfer->rnw ? DMA_FROM_DEVICE : - DMA_TO_DEVICE; - bool need_bounce; /* store cmd descriptor */ *ring_data++ = xfer->cmd_desc[0]; @@ -477,18 +513,6 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, /* 2nd and 3rd words of Data Buffer Descriptor Structure */ if (xfer->data) { - need_bounce = device_iommu_mapped(rings->sysdev) && - xfer->rnw && - xfer->data_len != ALIGN(xfer->data_len, 4); - xfer->dma = i3c_master_dma_map_single(rings->sysdev, - xfer->data, - xfer->data_len, - need_bounce, - dir); - if (!xfer->dma) { - hci_dma_unmap_xfer(hci, xfer_list, i); - return -ENOMEM; - } *ring_data++ = lower_32_bits(xfer->dma->addr); *ring_data++ = upper_32_bits(xfer->dma->addr); } else { @@ -503,26 +527,14 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci, xfer->ring_entry = enqueue_ptr; enqueue_ptr = (enqueue_ptr + 1) % rh->xfer_entries; - - /* - * We may update the hardware view of the enqueue pointer - * only if we didn't reach its dequeue pointer. - */ - op2_val = rh_reg_read(RING_OPERATION2); - if (enqueue_ptr == FIELD_GET(RING_OP2_CR_DEQ_PTR, op2_val)) { - /* the ring is full */ - hci_dma_unmap_xfer(hci, xfer_list, i + 1); - return -EBUSY; - } } - /* take care to update the hardware enqueue pointer atomically */ - spin_lock_irq(&rh->lock); - op1_val = rh_reg_read(RING_OPERATION1); + rh->xfer_space -= n; + op1_val &= ~RING_OP1_CR_ENQ_PTR; op1_val |= FIELD_PREP(RING_OP1_CR_ENQ_PTR, enqueue_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock_irq(&rh->lock); + spin_unlock_irq(&hci->lock); return 0; } @@ -534,18 +546,29 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, struct hci_rh_data *rh = &rings->headers[xfer_list[0].ring_number]; unsigned int i; bool did_unqueue = false; + u32 ring_status; - /* stop the ring */ - rh_reg_write(RING_CONTROL, RING_CTRL_ABORT); - if (wait_for_completion_timeout(&rh->op_done, HZ) == 0) { - /* - * We're deep in it if ever this condition is ever met. - * Hardware might still be writing to memory, etc. - */ - dev_crit(&hci->master.dev, "unable to abort the ring\n"); - WARN_ON(1); + guard(mutex)(&hci->control_mutex); + + ring_status = rh_reg_read(RING_STATUS); + if (ring_status & RING_STATUS_RUNNING) { + /* stop the ring */ + reinit_completion(&rh->op_done); + rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_ABORT); + wait_for_completion_timeout(&rh->op_done, HZ); + ring_status = rh_reg_read(RING_STATUS); + if (ring_status & RING_STATUS_RUNNING) { + /* + * We're deep in it if ever this condition is ever met. + * Hardware might still be writing to memory, etc. + */ + dev_crit(&hci->master.dev, "unable to abort the ring\n"); + WARN_ON(1); + } } + spin_lock_irq(&hci->lock); + for (i = 0; i < n; i++) { struct hci_xfer *xfer = xfer_list + i; int idx = xfer->ring_entry; @@ -559,7 +582,7 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, u32 *ring_data = rh->xfer + rh->xfer_struct_sz * idx; /* store no-op cmd descriptor */ - *ring_data++ = FIELD_PREP(CMD_0_ATTR, 0x7); + *ring_data++ = FIELD_PREP(CMD_0_ATTR, 0x7) | FIELD_PREP(CMD_0_TID, xfer->cmd_tid); *ring_data++ = 0; if (hci->cmd == &mipi_i3c_hci_cmd_v2) { *ring_data++ = 0; @@ -577,15 +600,25 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci, } /* restart the ring */ + mipi_i3c_hci_resume(hci); rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE); + rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_RUN_STOP); + + spin_unlock_irq(&hci->lock); return did_unqueue; } +static int hci_dma_handle_error(struct i3c_hci *hci, struct hci_xfer *xfer_list, int n) +{ + return hci_dma_dequeue_xfer(hci, xfer_list, n) ? -EIO : 0; +} + static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) { u32 op1_val, op2_val, resp, *ring_resp; unsigned int tid, done_ptr = rh->done_ptr; + unsigned int done_cnt = 0; struct hci_xfer *xfer; for (;;) { @@ -603,6 +636,7 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) dev_dbg(&hci->master.dev, "orphaned ring entry"); } else { hci_dma_unmap_xfer(hci, xfer, 1); + rh->src_xfers[done_ptr] = NULL; xfer->ring_entry = -1; xfer->response = resp; if (tid != xfer->cmd_tid) { @@ -617,15 +651,14 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh) done_ptr = (done_ptr + 1) % rh->xfer_entries; rh->done_ptr = done_ptr; + done_cnt += 1; } - /* take care to update the software dequeue pointer atomically */ - spin_lock(&rh->lock); + rh->xfer_space += done_cnt; op1_val = rh_reg_read(RING_OPERATION1); op1_val &= ~RING_OP1_CR_SW_DEQ_PTR; op1_val |= FIELD_PREP(RING_OP1_CR_SW_DEQ_PTR, done_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock(&rh->lock); } static int hci_dma_request_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev, @@ -805,13 +838,10 @@ static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh) i3c_master_queue_ibi(dev, slot); done: - /* take care to update the ibi dequeue pointer atomically */ - spin_lock(&rh->lock); op1_val = rh_reg_read(RING_OPERATION1); op1_val &= ~RING_OP1_IBI_DEQ_PTR; op1_val |= FIELD_PREP(RING_OP1_IBI_DEQ_PTR, deq_ptr); rh_reg_write(RING_OPERATION1, op1_val); - spin_unlock(&rh->lock); /* update the chunk pointer */ rh->ibi_chunk_ptr += ibi_chunks; @@ -845,29 +875,8 @@ static bool hci_dma_irq_handler(struct i3c_hci *hci) hci_dma_xfer_done(hci, rh); if (status & INTR_RING_OP) complete(&rh->op_done); - - if (status & INTR_TRANSFER_ABORT) { - u32 ring_status; - - dev_notice_ratelimited(&hci->master.dev, - "Ring %d: Transfer Aborted\n", i); - mipi_i3c_hci_resume(hci); - ring_status = rh_reg_read(RING_STATUS); - if (!(ring_status & RING_STATUS_RUNNING) && - status & INTR_TRANSFER_COMPLETION && - status & INTR_TRANSFER_ERR) { - /* - * Ring stop followed by run is an Intel - * specific required quirk after resuming the - * halted controller. Do it only when the ring - * is not in running state after a transfer - * error. - */ - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE); - rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | - RING_CTRL_RUN_STOP); - } - } + if (status & INTR_TRANSFER_ABORT) + dev_dbg(&hci->master.dev, "Ring %d: Transfer Aborted\n", i); if (status & INTR_IBI_RING_FULL) dev_err_ratelimited(&hci->master.dev, "Ring %d: IBI Ring Full Condition\n", i); @@ -883,6 +892,7 @@ const struct hci_io_ops mipi_i3c_hci_dma = { .cleanup = hci_dma_cleanup, .queue_xfer = hci_dma_queue_xfer, .dequeue_xfer = hci_dma_dequeue_xfer, + .handle_error = hci_dma_handle_error, .irq_handler = hci_dma_irq_handler, .request_ibi = hci_dma_request_ibi, .free_ibi = hci_dma_free_ibi,
diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h index 337b7ab..9ac9d0e 100644 --- a/drivers/i3c/master/mipi-i3c-hci/hci.h +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
@@ -50,6 +50,8 @@ struct i3c_hci { const struct hci_io_ops *io; void *io_data; const struct hci_cmd_ops *cmd; + spinlock_t lock; + struct mutex control_mutex; atomic_t next_cmd_tid; bool irq_inactive; u32 caps; @@ -87,6 +89,7 @@ struct hci_xfer { unsigned int data_len; unsigned int cmd_tid; struct completion *completion; + unsigned long timeout; union { struct { /* PIO specific */ @@ -120,6 +123,7 @@ struct hci_io_ops { bool (*irq_handler)(struct i3c_hci *hci); int (*queue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); bool (*dequeue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); + int (*handle_error)(struct i3c_hci *hci, struct hci_xfer *xfer, int n); int (*request_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev); @@ -154,5 +158,6 @@ void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci); void amd_set_od_pp_timing(struct i3c_hci *hci); void amd_set_resp_buf_thld(struct i3c_hci *hci); void i3c_hci_sync_irq_inactive(struct i3c_hci *hci); +int i3c_hci_process_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n); #endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/pio.c b/drivers/i3c/master/mipi-i3c-hci/pio.c index f8825ac..8f48a81 100644 --- a/drivers/i3c/master/mipi-i3c-hci/pio.c +++ b/drivers/i3c/master/mipi-i3c-hci/pio.c
@@ -123,7 +123,6 @@ struct hci_pio_ibi_data { }; struct hci_pio_data { - spinlock_t lock; struct hci_xfer *curr_xfer, *xfer_queue; struct hci_xfer *curr_rx, *rx_queue; struct hci_xfer *curr_tx, *tx_queue; @@ -212,7 +211,6 @@ static int hci_pio_init(struct i3c_hci *hci) return -ENOMEM; hci->io_data = pio; - spin_lock_init(&pio->lock); __hci_pio_init(hci, &size_val); @@ -631,7 +629,7 @@ static int hci_pio_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) xfer[i].data_left = xfer[i].data_len; } - spin_lock_irq(&pio->lock); + spin_lock_irq(&hci->lock); prev_queue_tail = pio->xfer_queue; pio->xfer_queue = &xfer[n - 1]; if (pio->curr_xfer) { @@ -645,7 +643,7 @@ static int hci_pio_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n) pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); } - spin_unlock_irq(&pio->lock); + spin_unlock_irq(&hci->lock); return 0; } @@ -716,14 +714,14 @@ static bool hci_pio_dequeue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int struct hci_pio_data *pio = hci->io_data; int ret; - spin_lock_irq(&pio->lock); + spin_lock_irq(&hci->lock); dev_dbg(&hci->master.dev, "n=%d status=%#x/%#x", n, pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); dev_dbg(&hci->master.dev, "main_status = %#x/%#x", readl(hci->base_regs + 0x20), readl(hci->base_regs + 0x28)); ret = hci_pio_dequeue_xfer_common(hci, pio, xfer, n); - spin_unlock_irq(&pio->lock); + spin_unlock_irq(&hci->lock); return ret; } @@ -1016,15 +1014,12 @@ static bool hci_pio_irq_handler(struct i3c_hci *hci) struct hci_pio_data *pio = hci->io_data; u32 status; - spin_lock(&pio->lock); status = pio_reg_read(INTR_STATUS); dev_dbg(&hci->master.dev, "PIO_INTR_STATUS %#x/%#x", status, pio->enabled_irqs); status &= pio->enabled_irqs | STAT_LATENCY_WARNINGS; - if (!status) { - spin_unlock(&pio->lock); + if (!status) return false; - } if (status & STAT_IBI_STATUS_THLD) hci_pio_process_ibi(hci, pio); @@ -1058,7 +1053,6 @@ static bool hci_pio_irq_handler(struct i3c_hci *hci) pio_reg_write(INTR_SIGNAL_ENABLE, pio->enabled_irqs); dev_dbg(&hci->master.dev, "PIO_INTR_STATUS %#x/%#x", pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE)); - spin_unlock(&pio->lock); return true; }
diff --git a/drivers/iio/accel/adxl313_core.c b/drivers/iio/accel/adxl313_core.c index 9f5d4d2..83dcac1 100644 --- a/drivers/iio/accel/adxl313_core.c +++ b/drivers/iio/accel/adxl313_core.c
@@ -998,6 +998,8 @@ static int adxl313_buffer_predisable(struct iio_dev *indio_dev) ret = regmap_write(data->regmap, ADXL313_REG_FIFO_CTL, FIELD_PREP(ADXL313_REG_FIFO_CTL_MODE_MSK, ADXL313_FIFO_BYPASS)); + if (ret) + return ret; ret = regmap_write(data->regmap, ADXL313_REG_INT_ENABLE, 0); if (ret)
diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index 1c1d64d..8f90c58 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c
@@ -745,7 +745,7 @@ static const struct iio_chan_spec adxl355_channels[] = { BIT(IIO_CHAN_INFO_OFFSET), .scan_index = 3, .scan_type = { - .sign = 's', + .sign = 'u', .realbits = 12, .storagebits = 16, .endianness = IIO_BE,
diff --git a/drivers/iio/accel/adxl380.c b/drivers/iio/accel/adxl380.c index 8fab2fd..a51d1d6 100644 --- a/drivers/iio/accel/adxl380.c +++ b/drivers/iio/accel/adxl380.c
@@ -877,7 +877,7 @@ static int adxl380_set_fifo_samples(struct adxl380_state *st) ret = regmap_update_bits(st->regmap, ADXL380_FIFO_CONFIG_0_REG, ADXL380_FIFO_SAMPLES_8_MSK, FIELD_PREP(ADXL380_FIFO_SAMPLES_8_MSK, - (fifo_samples & BIT(8)))); + !!(fifo_samples & BIT(8)))); if (ret) return ret;
diff --git a/drivers/iio/adc/ad4062.c b/drivers/iio/adc/ad4062.c index dd4ad32..8b03736 100644 --- a/drivers/iio/adc/ad4062.c +++ b/drivers/iio/adc/ad4062.c
@@ -719,10 +719,8 @@ static int ad4062_request_irq(struct iio_dev *indio_dev) } st->gpo_irq[1] = true; - return devm_request_threaded_irq(dev, ret, - ad4062_irq_handler_drdy, - NULL, IRQF_ONESHOT, indio_dev->name, - indio_dev); + return devm_request_irq(dev, ret, ad4062_irq_handler_drdy, + IRQF_NO_THREAD, indio_dev->name, indio_dev); } static const struct iio_trigger_ops ad4062_trigger_ops = { @@ -955,7 +953,7 @@ static int ad4062_write_raw_dispatch(struct ad4062_state *st, int val, int val2, default: return -EINVAL; } -}; +} static int ad4062_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val,
diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index fcd8aea..e16dede 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c
@@ -531,7 +531,7 @@ static int ad7768_reg_access(struct iio_dev *indio_dev, return ret; } -static void ad7768_fill_scale_tbl(struct iio_dev *dev) +static int ad7768_fill_scale_tbl(struct iio_dev *dev) { struct ad7768_state *st = iio_priv(dev); const struct iio_scan_type *scan_type; @@ -541,6 +541,11 @@ static void ad7768_fill_scale_tbl(struct iio_dev *dev) u64 tmp2; scan_type = iio_get_current_scan_type(dev, &dev->channels[0]); + if (IS_ERR(scan_type)) { + dev_err(&st->spi->dev, "Failed to get scan type.\n"); + return PTR_ERR(scan_type); + } + if (scan_type->sign == 's') val2 = scan_type->realbits - 1; else @@ -565,6 +570,8 @@ static void ad7768_fill_scale_tbl(struct iio_dev *dev) st->scale_tbl[i][0] = tmp0; /* Integer part */ st->scale_tbl[i][1] = abs(tmp1); /* Fractional part */ } + + return 0; } static int ad7768_set_sinc3_dec_rate(struct ad7768_state *st, @@ -669,7 +676,9 @@ static int ad7768_configure_dig_fil(struct iio_dev *dev, } /* Update scale table: scale values vary according to the precision */ - ad7768_fill_scale_tbl(dev); + ret = ad7768_fill_scale_tbl(dev); + if (ret) + return ret; ad7768_fill_samp_freq_tbl(st);
diff --git a/drivers/iio/adc/ade9000.c b/drivers/iio/adc/ade9000.c index db085dc..1abbfdf 100644 --- a/drivers/iio/adc/ade9000.c +++ b/drivers/iio/adc/ade9000.c
@@ -787,7 +787,7 @@ static int ade9000_iio_push_streaming(struct iio_dev *indio_dev) ADE9000_MIDDLE_PAGE_BIT); if (ret) { dev_err_ratelimited(dev, "IRQ0 WFB write fail"); - return IRQ_HANDLED; + return ret; } ade9000_configure_scan(indio_dev, ADE9000_REG_WF_BUFF); @@ -1123,7 +1123,7 @@ static int ade9000_write_raw(struct iio_dev *indio_dev, tmp &= ~ADE9000_PHASE_C_POS_BIT; switch (tmp) { - case ADE9000_REG_AWATTOS: + case ADE9000_REG_AWATT: return regmap_write(st->regmap, ADE9000_ADDR_ADJUST(ADE9000_REG_AWATTOS, chan->channel), val); @@ -1706,6 +1706,10 @@ static int ade9000_probe(struct spi_device *spi) init_completion(&st->reset_completion); + ret = devm_mutex_init(dev, &st->lock); + if (ret) + return ret; + ret = ade9000_request_irq(dev, "irq0", ade9000_irq0_thread, indio_dev); if (ret) return ret; @@ -1718,10 +1722,6 @@ static int ade9000_probe(struct spi_device *spi) if (ret) return ret; - ret = devm_mutex_init(dev, &st->lock); - if (ret) - return ret; - /* External CMOS clock input (optional - crystal can be used instead) */ st->clkin = devm_clk_get_optional_enabled(dev, NULL); if (IS_ERR(st->clkin))
diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 4be44c5..83a9885 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c
@@ -415,6 +415,7 @@ static int aspeed_adc_vref_config(struct iio_dev *indio_dev) } adc_engine_control_reg_val = readl(data->base + ASPEED_REG_ENGINE_CONTROL); + adc_engine_control_reg_val &= ~ASPEED_ADC_REF_VOLTAGE; ret = devm_regulator_get_enable_read_voltage(data->dev, "vref"); if (ret < 0 && ret != -ENODEV)
diff --git a/drivers/iio/adc/nxp-sar-adc.c b/drivers/iio/adc/nxp-sar-adc.c index 9efa883..58103bf 100644 --- a/drivers/iio/adc/nxp-sar-adc.c +++ b/drivers/iio/adc/nxp-sar-adc.c
@@ -718,6 +718,10 @@ static int nxp_sar_adc_buffer_software_do_postenable(struct iio_dev *indio_dev) struct nxp_sar_adc *info = iio_priv(indio_dev); int ret; + info->dma_chan = dma_request_chan(indio_dev->dev.parent, "rx"); + if (IS_ERR(info->dma_chan)) + return PTR_ERR(info->dma_chan); + nxp_sar_adc_dma_channels_enable(info, *indio_dev->active_scan_mask); nxp_sar_adc_dma_cfg(info, true); @@ -738,6 +742,7 @@ static int nxp_sar_adc_buffer_software_do_postenable(struct iio_dev *indio_dev) out_dma_channels_disable: nxp_sar_adc_dma_cfg(info, false); nxp_sar_adc_dma_channels_disable(info, *indio_dev->active_scan_mask); + dma_release_channel(info->dma_chan); return ret; } @@ -765,10 +770,6 @@ static int nxp_sar_adc_buffer_postenable(struct iio_dev *indio_dev) unsigned long channel; int ret; - info->dma_chan = dma_request_chan(indio_dev->dev.parent, "rx"); - if (IS_ERR(info->dma_chan)) - return PTR_ERR(info->dma_chan); - info->channels_used = 0; /*
diff --git a/drivers/iio/adc/ti-adc161s626.c b/drivers/iio/adc/ti-adc161s626.c index 28aa6b8..be1cc2e 100644 --- a/drivers/iio/adc/ti-adc161s626.c +++ b/drivers/iio/adc/ti-adc161s626.c
@@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/err.h> #include <linux/spi/spi.h> +#include <linux/unaligned.h> #include <linux/iio/iio.h> #include <linux/iio/trigger.h> #include <linux/iio/buffer.h> @@ -70,8 +71,7 @@ struct ti_adc_data { u8 read_size; u8 shift; - - u8 buffer[16] __aligned(IIO_DMA_MINALIGN); + u8 buf[3] __aligned(IIO_DMA_MINALIGN); }; static int ti_adc_read_measurement(struct ti_adc_data *data, @@ -80,26 +80,20 @@ static int ti_adc_read_measurement(struct ti_adc_data *data, int ret; switch (data->read_size) { - case 2: { - __be16 buf; - - ret = spi_read(data->spi, (void *) &buf, 2); + case 2: + ret = spi_read(data->spi, data->buf, 2); if (ret) return ret; - *val = be16_to_cpu(buf); + *val = get_unaligned_be16(data->buf); break; - } - case 3: { - __be32 buf; - - ret = spi_read(data->spi, (void *) &buf, 3); + case 3: + ret = spi_read(data->spi, data->buf, 3); if (ret) return ret; - *val = be32_to_cpu(buf) >> 8; + *val = get_unaligned_be24(data->buf); break; - } default: return -EINVAL; } @@ -114,15 +108,20 @@ static irqreturn_t ti_adc_trigger_handler(int irq, void *private) struct iio_poll_func *pf = private; struct iio_dev *indio_dev = pf->indio_dev; struct ti_adc_data *data = iio_priv(indio_dev); - int ret; + struct { + s16 data; + aligned_s64 timestamp; + } scan = { }; + int ret, val; - ret = ti_adc_read_measurement(data, &indio_dev->channels[0], - (int *) &data->buffer); - if (!ret) - iio_push_to_buffers_with_timestamp(indio_dev, - data->buffer, - iio_get_time_ns(indio_dev)); + ret = ti_adc_read_measurement(data, &indio_dev->channels[0], &val); + if (ret) + goto exit_notify_done; + scan.data = val; + iio_push_to_buffers_with_timestamp(indio_dev, &scan, iio_get_time_ns(indio_dev)); + + exit_notify_done: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED;
diff --git a/drivers/iio/adc/ti-ads1018.c b/drivers/iio/adc/ti-ads1018.c index 6246b3c..0780abd 100644 --- a/drivers/iio/adc/ti-ads1018.c +++ b/drivers/iio/adc/ti-ads1018.c
@@ -249,7 +249,7 @@ static int ads1018_single_shot(struct ads1018 *ads1018, struct iio_chan_spec const *chan, u16 *cnv) { u8 max_drate_mode = ads1018->chip_info->num_data_rate_mode_to_hz - 1; - u8 drate = ads1018->chip_info->data_rate_mode_to_hz[max_drate_mode]; + u32 drate = ads1018->chip_info->data_rate_mode_to_hz[max_drate_mode]; u8 pga_mode = ads1018->chan_data[chan->scan_index].pga_mode; struct spi_transfer xfer[2] = { {
diff --git a/drivers/iio/adc/ti-ads1119.c b/drivers/iio/adc/ti-ads1119.c index c9cedc5..79be71b 100644 --- a/drivers/iio/adc/ti-ads1119.c +++ b/drivers/iio/adc/ti-ads1119.c
@@ -274,12 +274,15 @@ static int ads1119_single_conversion(struct ads1119_state *st, ret = pm_runtime_resume_and_get(dev); if (ret) - goto pdown; + return ret; ret = ads1119_configure_channel(st, mux, gain, datarate); if (ret) goto pdown; + if (st->client->irq) + reinit_completion(&st->completion); + ret = i2c_smbus_write_byte(st->client, ADS1119_CMD_START_SYNC); if (ret) goto pdown; @@ -735,10 +738,8 @@ static int ads1119_probe(struct i2c_client *client) return dev_err_probe(dev, ret, "Failed to setup IIO buffer\n"); if (client->irq > 0) { - ret = devm_request_threaded_irq(dev, client->irq, - ads1119_irq_handler, - NULL, IRQF_ONESHOT, - "ads1119", indio_dev); + ret = devm_request_irq(dev, client->irq, ads1119_irq_handler, + IRQF_NO_THREAD, "ads1119", indio_dev); if (ret) return dev_err_probe(dev, ret, "Failed to allocate irq\n");
diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index bbe1ce5..cdc6248 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c
@@ -427,13 +427,15 @@ static int ti_ads7950_set(struct gpio_chip *chip, unsigned int offset, static int ti_ads7950_get(struct gpio_chip *chip, unsigned int offset) { struct ti_ads7950_state *st = gpiochip_get_data(chip); + bool state; int ret; mutex_lock(&st->slock); /* If set as output, return the output */ if (st->gpio_cmd_settings_bitmask & BIT(offset)) { - ret = st->cmd_settings_bitmask & BIT(offset); + state = st->cmd_settings_bitmask & BIT(offset); + ret = 0; goto out; } @@ -444,7 +446,7 @@ static int ti_ads7950_get(struct gpio_chip *chip, unsigned int offset) if (ret) goto out; - ret = ((st->single_rx >> 12) & BIT(offset)) ? 1 : 0; + state = (st->single_rx >> 12) & BIT(offset); /* Revert back to original settings */ st->cmd_settings_bitmask &= ~TI_ADS7950_CR_GPIO_DATA; @@ -456,7 +458,7 @@ static int ti_ads7950_get(struct gpio_chip *chip, unsigned int offset) out: mutex_unlock(&st->slock); - return ret; + return ret ?: state; } static int ti_ads7950_get_direction(struct gpio_chip *chip,
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 70f81c4..24e0b59 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c
@@ -613,7 +613,7 @@ static int bme680_wait_for_eoc(struct bme680_data *data) * + heater duration */ int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press + - data->oversampling_humid) * 1936) + (477 * 4) + + data->oversampling_humid) * 1963) + (477 * 4) + (477 * 5) + 1000 + (data->heater_dur * 1000); fsleep(wait_eoc_us);
diff --git a/drivers/iio/chemical/sps30_i2c.c b/drivers/iio/chemical/sps30_i2c.c index f692c08..c92f049 100644 --- a/drivers/iio/chemical/sps30_i2c.c +++ b/drivers/iio/chemical/sps30_i2c.c
@@ -171,7 +171,7 @@ static int sps30_i2c_read_meas(struct sps30_state *state, __be32 *meas, size_t n if (!sps30_i2c_meas_ready(state)) return -ETIMEDOUT; - return sps30_i2c_command(state, SPS30_I2C_READ_MEAS, NULL, 0, meas, sizeof(num) * num); + return sps30_i2c_command(state, SPS30_I2C_READ_MEAS, NULL, 0, meas, sizeof(*meas) * num); } static int sps30_i2c_clean_fan(struct sps30_state *state)
diff --git a/drivers/iio/chemical/sps30_serial.c b/drivers/iio/chemical/sps30_serial.c index 008bc88..a5e6bc0 100644 --- a/drivers/iio/chemical/sps30_serial.c +++ b/drivers/iio/chemical/sps30_serial.c
@@ -303,7 +303,7 @@ static int sps30_serial_read_meas(struct sps30_state *state, __be32 *meas, size_ if (msleep_interruptible(1000)) return -EINTR; - ret = sps30_serial_command(state, SPS30_SERIAL_READ_MEAS, NULL, 0, meas, num * sizeof(num)); + ret = sps30_serial_command(state, SPS30_SERIAL_READ_MEAS, NULL, 0, meas, num * sizeof(*meas)); if (ret < 0) return ret; /* if measurements aren't ready sensor returns empty frame */
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 5540e2d2..417c4ab 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -14,6 +14,7 @@ #include <linux/iio/triggered_buffer.h> #include <linux/iio/trigger_consumer.h> #include <linux/iio/sysfs.h> +#include <linux/iio/kfifo_buf.h> #include "hid-sensor-trigger.h" static ssize_t _hid_sensor_set_report_latency(struct device *dev, @@ -202,12 +203,21 @@ static void hid_sensor_set_power_work(struct work_struct *work) _hid_sensor_power_state(attrb, true); } -static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig, - bool state) +static int buffer_postenable(struct iio_dev *indio_dev) { - return hid_sensor_power_state(iio_trigger_get_drvdata(trig), state); + return hid_sensor_power_state(iio_device_get_drvdata(indio_dev), 1); } +static int buffer_predisable(struct iio_dev *indio_dev) +{ + return hid_sensor_power_state(iio_device_get_drvdata(indio_dev), 0); +} + +static const struct iio_buffer_setup_ops hid_sensor_buffer_ops = { + .postenable = buffer_postenable, + .predisable = buffer_predisable, +}; + void hid_sensor_remove_trigger(struct iio_dev *indio_dev, struct hid_sensor_common *attrb) { @@ -219,14 +229,9 @@ void hid_sensor_remove_trigger(struct iio_dev *indio_dev, cancel_work_sync(&attrb->work); iio_trigger_unregister(attrb->trigger); iio_trigger_free(attrb->trigger); - iio_triggered_buffer_cleanup(indio_dev); } EXPORT_SYMBOL_NS(hid_sensor_remove_trigger, "IIO_HID"); -static const struct iio_trigger_ops hid_sensor_trigger_ops = { - .set_trigger_state = &hid_sensor_data_rdy_trigger_set_state, -}; - int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, struct hid_sensor_common *attrb) { @@ -239,25 +244,34 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, else fifo_attrs = NULL; - ret = iio_triggered_buffer_setup_ext(indio_dev, - &iio_pollfunc_store_time, NULL, - IIO_BUFFER_DIRECTION_IN, - NULL, fifo_attrs); + indio_dev->modes = INDIO_DIRECT_MODE | INDIO_HARDWARE_TRIGGERED; + + ret = devm_iio_kfifo_buffer_setup_ext(&indio_dev->dev, indio_dev, + &hid_sensor_buffer_ops, + fifo_attrs); if (ret) { - dev_err(&indio_dev->dev, "Triggered Buffer Setup Failed\n"); + dev_err(&indio_dev->dev, "Kfifo Buffer Setup Failed\n"); return ret; } + /* + * The current user space in distro "iio-sensor-proxy" is not working in + * trigerless mode and it expects + * /sys/bus/iio/devices/iio:device0/trigger/current_trigger. + * The change replacing iio_triggered_buffer_setup_ext() with + * devm_iio_kfifo_buffer_setup_ext() will not create attribute without + * registering a trigger with INDIO_HARDWARE_TRIGGERED. + * So the below code fragment is still required. + */ + trig = iio_trigger_alloc(indio_dev->dev.parent, "%s-dev%d", name, iio_device_id(indio_dev)); if (trig == NULL) { dev_err(&indio_dev->dev, "Trigger Allocate Failed\n"); - ret = -ENOMEM; - goto error_triggered_buffer_cleanup; + return -ENOMEM; } iio_trigger_set_drvdata(trig, attrb); - trig->ops = &hid_sensor_trigger_ops; ret = iio_trigger_register(trig); if (ret) { @@ -284,8 +298,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, iio_trigger_unregister(trig); error_free_trig: iio_trigger_free(trig); -error_triggered_buffer_cleanup: - iio_triggered_buffer_cleanup(indio_dev); return ret; } EXPORT_SYMBOL_NS(hid_sensor_setup_trigger, "IIO_HID");
diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c index cd47cb1..6027e8d 100644 --- a/drivers/iio/dac/ad5770r.c +++ b/drivers/iio/dac/ad5770r.c
@@ -322,7 +322,7 @@ static int ad5770r_read_raw(struct iio_dev *indio_dev, chan->address, st->transf_buf, 2); if (ret) - return 0; + return ret; buf16 = get_unaligned_le16(st->transf_buf); *val = buf16 >> 2;
diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c index 6dda891..c61868f 100644 --- a/drivers/iio/dac/ds4424.c +++ b/drivers/iio/dac/ds4424.c
@@ -140,7 +140,7 @@ static int ds4424_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - if (val < S8_MIN || val > S8_MAX) + if (val <= S8_MIN || val > S8_MAX) return -EINVAL; if (val > 0) {
diff --git a/drivers/iio/dac/mcp47feb02.c b/drivers/iio/dac/mcp47feb02.c index b218f0c..faccb80 100644 --- a/drivers/iio/dac/mcp47feb02.c +++ b/drivers/iio/dac/mcp47feb02.c
@@ -65,7 +65,7 @@ #define MCP47FEB02_MAX_SCALES_CH 3 #define MCP47FEB02_DAC_WIPER_UNLOCKED 0 #define MCP47FEB02_NORMAL_OPERATION 0 -#define MCP47FEB02_INTERNAL_BAND_GAP_mV 2440 +#define MCP47FEB02_INTERNAL_BAND_GAP_uV 2440000 #define NV_DAC_ADDR_OFFSET 0x10 enum mcp47feb02_vref_mode { @@ -697,44 +697,40 @@ static const struct iio_chan_spec mcp47febxx_ch_template = { }; static void mcp47feb02_init_scale(struct mcp47feb02_data *data, enum mcp47feb02_scale scale, - int vref_mV, int scale_avail[]) + int vref_uV, int scale_avail[]) { u32 value_micro, value_int; u64 tmp; - /* vref_mV should not be negative */ - tmp = (u64)vref_mV * MICRO >> data->chip_features->resolution; + /* vref_uV should not be negative */ + tmp = (u64)vref_uV * MILLI >> data->chip_features->resolution; value_int = div_u64_rem(tmp, MICRO, &value_micro); scale_avail[scale * 2] = value_int; scale_avail[scale * 2 + 1] = value_micro; } -static int mcp47feb02_init_scales_avail(struct mcp47feb02_data *data, int vdd_mV, - int vref_mV, int vref1_mV) +static int mcp47feb02_init_scales_avail(struct mcp47feb02_data *data, int vdd_uV, + int vref_uV, int vref1_uV) { - struct device *dev = regmap_get_device(data->regmap); int tmp_vref; - mcp47feb02_init_scale(data, MCP47FEB02_SCALE_VDD, vdd_mV, data->scale); + mcp47feb02_init_scale(data, MCP47FEB02_SCALE_VDD, vdd_uV, data->scale); if (data->use_vref) - tmp_vref = vref_mV; + tmp_vref = vref_uV; else - tmp_vref = MCP47FEB02_INTERNAL_BAND_GAP_mV; + tmp_vref = MCP47FEB02_INTERNAL_BAND_GAP_uV; mcp47feb02_init_scale(data, MCP47FEB02_SCALE_GAIN_X1, tmp_vref, data->scale); mcp47feb02_init_scale(data, MCP47FEB02_SCALE_GAIN_X2, tmp_vref * 2, data->scale); if (data->phys_channels >= 4) { - mcp47feb02_init_scale(data, MCP47FEB02_SCALE_VDD, vdd_mV, data->scale_1); - - if (data->use_vref1 && vref1_mV <= 0) - return dev_err_probe(dev, vref1_mV, "Invalid voltage for Vref1\n"); + mcp47feb02_init_scale(data, MCP47FEB02_SCALE_VDD, vdd_uV, data->scale_1); if (data->use_vref1) - tmp_vref = vref1_mV; + tmp_vref = vref1_uV; else - tmp_vref = MCP47FEB02_INTERNAL_BAND_GAP_mV; + tmp_vref = MCP47FEB02_INTERNAL_BAND_GAP_uV; mcp47feb02_init_scale(data, MCP47FEB02_SCALE_GAIN_X1, tmp_vref, data->scale_1); @@ -955,8 +951,6 @@ static int mcp47feb02_parse_fw(struct iio_dev *indio_dev, u32 num_channels; u8 chan_idx = 0; - guard(mutex)(&data->lock); - num_channels = device_get_child_node_count(dev); if (num_channels > chip_features->phys_channels) return dev_err_probe(dev, -EINVAL, "More channels than the chip supports\n"); @@ -1080,8 +1074,8 @@ static int mcp47feb02_init_ctrl_regs(struct mcp47feb02_data *data) return 0; } -static int mcp47feb02_init_ch_scales(struct mcp47feb02_data *data, int vdd_mV, - int vref_mV, int vref1_mV) +static int mcp47feb02_init_ch_scales(struct mcp47feb02_data *data, int vdd_uV, + int vref_uV, int vref1_uV) { unsigned int i; @@ -1089,7 +1083,7 @@ static int mcp47feb02_init_ch_scales(struct mcp47feb02_data *data, int vdd_mV, struct device *dev = regmap_get_device(data->regmap); int ret; - ret = mcp47feb02_init_scales_avail(data, vdd_mV, vref_mV, vref1_mV); + ret = mcp47feb02_init_scales_avail(data, vdd_uV, vref_uV, vref1_uV); if (ret) return dev_err_probe(dev, ret, "failed to init scales for ch %u\n", i); } @@ -1103,10 +1097,7 @@ static int mcp47feb02_probe(struct i2c_client *client) struct device *dev = &client->dev; struct mcp47feb02_data *data; struct iio_dev *indio_dev; - int vref1_mV = 0; - int vref_mV = 0; - int vdd_mV; - int ret; + int vref1_uV, vref_uV, vdd_uV, ret; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -1143,13 +1134,14 @@ static int mcp47feb02_probe(struct i2c_client *client) if (ret < 0) return ret; - vdd_mV = ret / MILLI; + vdd_uV = ret; ret = devm_regulator_get_enable_read_voltage(dev, "vref"); if (ret > 0) { - vref_mV = ret / MILLI; + vref_uV = ret; data->use_vref = true; } else { + vref_uV = 0; dev_dbg(dev, "using internal band gap as voltage reference.\n"); dev_dbg(dev, "Vref is unavailable.\n"); } @@ -1157,9 +1149,10 @@ static int mcp47feb02_probe(struct i2c_client *client) if (chip_features->have_ext_vref1) { ret = devm_regulator_get_enable_read_voltage(dev, "vref1"); if (ret > 0) { - vref1_mV = ret / MILLI; + vref1_uV = ret; data->use_vref1 = true; } else { + vref1_uV = 0; dev_dbg(dev, "using internal band gap as voltage reference 1.\n"); dev_dbg(dev, "Vref1 is unavailable.\n"); } @@ -1169,7 +1162,7 @@ static int mcp47feb02_probe(struct i2c_client *client) if (ret) return dev_err_probe(dev, ret, "Error initialising vref register\n"); - ret = mcp47feb02_init_ch_scales(data, vdd_mV, vref_mV, vref1_mV); + ret = mcp47feb02_init_ch_scales(data, vdd_uV, vref_uV, vref1_uV); if (ret) return ret;
diff --git a/drivers/iio/frequency/adf4377.c b/drivers/iio/frequency/adf4377.c index fa686f7..8e2da21 100644 --- a/drivers/iio/frequency/adf4377.c +++ b/drivers/iio/frequency/adf4377.c
@@ -508,7 +508,7 @@ static int adf4377_soft_reset(struct adf4377_state *st) return ret; return regmap_read_poll_timeout(st->regmap, 0x0, read_val, - !(read_val & (ADF4377_0000_SOFT_RESET_R_MSK | + !(read_val & (ADF4377_0000_SOFT_RESET_MSK | ADF4377_0000_SOFT_RESET_R_MSK)), 200, 200 * 100); }
diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ee2fcd2..d84e04e 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c
@@ -322,7 +322,9 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_RAW: /* Resume device */ - pm_runtime_get_sync(mpu3050->dev); + ret = pm_runtime_resume_and_get(mpu3050->dev); + if (ret) + return ret; mutex_lock(&mpu3050->lock); ret = mpu3050_set_8khz_samplerate(mpu3050); @@ -647,14 +649,20 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p) static int mpu3050_buffer_preenable(struct iio_dev *indio_dev) { struct mpu3050 *mpu3050 = iio_priv(indio_dev); + int ret; - pm_runtime_get_sync(mpu3050->dev); + ret = pm_runtime_resume_and_get(mpu3050->dev); + if (ret) + return ret; /* Unless we have OUR trigger active, run at full speed */ - if (!mpu3050->hw_irq_trigger) - return mpu3050_set_8khz_samplerate(mpu3050); + if (!mpu3050->hw_irq_trigger) { + ret = mpu3050_set_8khz_samplerate(mpu3050); + if (ret) + pm_runtime_put_autosuspend(mpu3050->dev); + } - return 0; + return ret; } static int mpu3050_buffer_postdisable(struct iio_dev *indio_dev) @@ -1121,11 +1129,16 @@ static int mpu3050_trigger_probe(struct iio_dev *indio_dev, int irq) ret = iio_trigger_register(mpu3050->trig); if (ret) - return ret; + goto err_iio_trigger; indio_dev->trig = iio_trigger_get(mpu3050->trig); return 0; + +err_iio_trigger: + free_irq(mpu3050->irq, mpu3050->trig); + + return ret; } int mpu3050_common_probe(struct device *dev, @@ -1213,12 +1226,6 @@ int mpu3050_common_probe(struct device *dev, goto err_power_down; } - ret = iio_device_register(indio_dev); - if (ret) { - dev_err(dev, "device register failed\n"); - goto err_cleanup_buffer; - } - dev_set_drvdata(dev, indio_dev); /* Check if we have an assigned IRQ to use as trigger */ @@ -1241,9 +1248,20 @@ int mpu3050_common_probe(struct device *dev, pm_runtime_use_autosuspend(dev); pm_runtime_put(dev); + ret = iio_device_register(indio_dev); + if (ret) { + dev_err(dev, "device register failed\n"); + goto err_iio_device_register; + } + return 0; -err_cleanup_buffer: +err_iio_device_register: + pm_runtime_get_sync(dev); + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + if (irq) + free_irq(mpu3050->irq, mpu3050->trig); iio_triggered_buffer_cleanup(indio_dev); err_power_down: mpu3050_power_down(mpu3050); @@ -1256,13 +1274,13 @@ void mpu3050_common_remove(struct device *dev) struct iio_dev *indio_dev = dev_get_drvdata(dev); struct mpu3050 *mpu3050 = iio_priv(indio_dev); + iio_device_unregister(indio_dev); pm_runtime_get_sync(dev); pm_runtime_put_noidle(dev); pm_runtime_disable(dev); - iio_triggered_buffer_cleanup(indio_dev); if (mpu3050->irq) - free_irq(mpu3050->irq, mpu3050); - iio_device_unregister(indio_dev); + free_irq(mpu3050->irq, mpu3050->trig); + iio_triggered_buffer_cleanup(indio_dev); mpu3050_power_down(mpu3050); }
diff --git a/drivers/iio/gyro/mpu3050-i2c.c b/drivers/iio/gyro/mpu3050-i2c.c index 092878f..6549b22 100644 --- a/drivers/iio/gyro/mpu3050-i2c.c +++ b/drivers/iio/gyro/mpu3050-i2c.c
@@ -19,8 +19,7 @@ static int mpu3050_i2c_bypass_select(struct i2c_mux_core *mux, u32 chan_id) struct mpu3050 *mpu3050 = i2c_mux_priv(mux); /* Just power up the device, that is all that is needed */ - pm_runtime_get_sync(mpu3050->dev); - return 0; + return pm_runtime_resume_and_get(mpu3050->dev); } static int mpu3050_i2c_bypass_deselect(struct i2c_mux_core *mux, u32 chan_id)
diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index d160147c..a2bc1d1 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c
@@ -526,7 +526,7 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->spi = spi; adis->data = data; - if (!adis->ops->write && !adis->ops->read && !adis->ops->reset) + if (!adis->ops) adis->ops = &adis_default_ops; else if (!adis->ops->write || !adis->ops->read || !adis->ops->reset) return -EINVAL;
diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c index 28f0dbd..1f2af50 100644 --- a/drivers/iio/imu/adis16550.c +++ b/drivers/iio/imu/adis16550.c
@@ -643,12 +643,12 @@ static int adis16550_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: switch (chan->type) { case IIO_ANGL_VEL: - ret = adis16550_get_accl_filter_freq(st, val); + ret = adis16550_get_gyro_filter_freq(st, val); if (ret) return ret; return IIO_VAL_INT; case IIO_ACCEL: - ret = adis16550_get_gyro_filter_freq(st, val); + ret = adis16550_get_accl_filter_freq(st, val); if (ret) return ret; return IIO_VAL_INT; @@ -681,9 +681,9 @@ static int adis16550_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: switch (chan->type) { case IIO_ANGL_VEL: - return adis16550_set_accl_filter_freq(st, val); - case IIO_ACCEL: return adis16550_set_gyro_filter_freq(st, val); + case IIO_ACCEL: + return adis16550_set_accl_filter_freq(st, val); default: return -EINVAL; }
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 5f47708..4abb83b 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -573,12 +573,16 @@ static int bmi160_config_pin(struct regmap *regmap, enum bmi160_int_pin pin, int_out_ctrl_shift = BMI160_INT1_OUT_CTRL_SHIFT; int_latch_mask = BMI160_INT1_LATCH_MASK; int_map_mask = BMI160_INT1_MAP_DRDY_EN; + pin_name = "INT1"; break; case BMI160_PIN_INT2: int_out_ctrl_shift = BMI160_INT2_OUT_CTRL_SHIFT; int_latch_mask = BMI160_INT2_LATCH_MASK; int_map_mask = BMI160_INT2_MAP_DRDY_EN; + pin_name = "INT2"; break; + default: + return -EINVAL; } int_out_ctrl_mask = BMI160_INT_OUT_CTRL_MASK << int_out_ctrl_shift; @@ -612,17 +616,8 @@ static int bmi160_config_pin(struct regmap *regmap, enum bmi160_int_pin pin, ret = bmi160_write_conf_reg(regmap, BMI160_REG_INT_MAP, int_map_mask, int_map_mask, write_usleep); - if (ret) { - switch (pin) { - case BMI160_PIN_INT1: - pin_name = "INT1"; - break; - case BMI160_PIN_INT2: - pin_name = "INT2"; - break; - } + if (ret) dev_err(dev, "Failed to configure %s IRQ pin", pin_name); - } return ret; }
diff --git a/drivers/iio/imu/bno055/bno055.c b/drivers/iio/imu/bno055/bno055.c index 303bc308f..c96fec2 100644 --- a/drivers/iio/imu/bno055/bno055.c +++ b/drivers/iio/imu/bno055/bno055.c
@@ -64,7 +64,7 @@ #define BNO055_GRAVITY_DATA_X_LSB_REG 0x2E #define BNO055_GRAVITY_DATA_Y_LSB_REG 0x30 #define BNO055_GRAVITY_DATA_Z_LSB_REG 0x32 -#define BNO055_SCAN_CH_COUNT ((BNO055_GRAVITY_DATA_Z_LSB_REG - BNO055_ACC_DATA_X_LSB_REG) / 2) +#define BNO055_SCAN_CH_COUNT ((BNO055_GRAVITY_DATA_Z_LSB_REG - BNO055_ACC_DATA_X_LSB_REG) / 2 + 1) #define BNO055_TEMP_REG 0x34 #define BNO055_CALIB_STAT_REG 0x35 #define BNO055_CALIB_STAT_MAGN_SHIFT 0
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c index 54760d8..0ab6edd 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c
@@ -651,6 +651,8 @@ static int inv_icm42600_accel_write_odr(struct iio_dev *indio_dev, return -EINVAL; conf.odr = inv_icm42600_accel_odr_conv[idx / 2]; + if (conf.odr == st->conf.accel.odr) + return 0; pm_runtime_get_sync(dev); mutex_lock(&st->lock);
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c index ada968b..68a3957 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
@@ -371,6 +371,8 @@ static int inv_icm42600_buffer_predisable(struct iio_dev *indio_dev) static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) { struct inv_icm42600_state *st = iio_device_get_drvdata(indio_dev); + struct inv_icm42600_sensor_state *sensor_st = iio_priv(indio_dev); + struct inv_sensors_timestamp *ts = &sensor_st->ts; struct device *dev = regmap_get_device(st->map); unsigned int sensor; unsigned int *watermark; @@ -392,6 +394,8 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) mutex_lock(&st->lock); + inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); + ret = inv_icm42600_buffer_set_fifo_en(st, st->fifo.en & ~sensor); if (ret) goto out_unlock;
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c index 7ef0a25..11339dd 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c
@@ -358,6 +358,8 @@ static int inv_icm42600_gyro_write_odr(struct iio_dev *indio_dev, return -EINVAL; conf.odr = inv_icm42600_gyro_odr_conv[idx / 2]; + if (conf.odr == st->conf.gyro.odr) + return 0; pm_runtime_get_sync(dev); mutex_lock(&st->lock);
diff --git a/drivers/iio/imu/inv_icm45600/inv_icm45600.h b/drivers/iio/imu/inv_icm45600/inv_icm45600.h index c5b5446..1c796d4 100644 --- a/drivers/iio/imu/inv_icm45600/inv_icm45600.h +++ b/drivers/iio/imu/inv_icm45600/inv_icm45600.h
@@ -205,7 +205,7 @@ struct inv_icm45600_sensor_state { #define INV_ICM45600_SPI_SLEW_RATE_38NS 0 #define INV_ICM45600_REG_INT1_CONFIG2 0x0018 -#define INV_ICM45600_INT1_CONFIG2_PUSH_PULL BIT(2) +#define INV_ICM45600_INT1_CONFIG2_OPEN_DRAIN BIT(2) #define INV_ICM45600_INT1_CONFIG2_LATCHED BIT(1) #define INV_ICM45600_INT1_CONFIG2_ACTIVE_HIGH BIT(0) #define INV_ICM45600_INT1_CONFIG2_ACTIVE_LOW 0x00
diff --git a/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c b/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c index 25bd975..d490531 100644 --- a/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c +++ b/drivers/iio/imu/inv_icm45600/inv_icm45600_core.c
@@ -637,8 +637,8 @@ static int inv_icm45600_irq_init(struct inv_icm45600_state *st, int irq, break; } - if (!open_drain) - val |= INV_ICM45600_INT1_CONFIG2_PUSH_PULL; + if (open_drain) + val |= INV_ICM45600_INT1_CONFIG2_OPEN_DRAIN; ret = regmap_write(st->map, INV_ICM45600_REG_INT1_CONFIG2, val); if (ret) @@ -744,6 +744,11 @@ int inv_icm45600_core_probe(struct regmap *regmap, const struct inv_icm45600_chi */ fsleep(5 * USEC_PER_MSEC); + /* set pm_runtime active early for disable vddio resource cleanup */ + ret = pm_runtime_set_active(dev); + if (ret) + return ret; + ret = inv_icm45600_enable_regulator_vddio(st); if (ret) return ret; @@ -776,7 +781,7 @@ int inv_icm45600_core_probe(struct regmap *regmap, const struct inv_icm45600_chi if (ret) return ret; - ret = devm_pm_runtime_set_active_enabled(dev); + ret = devm_pm_runtime_enable(dev); if (ret) return ret;
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index b2fa1f4..5796896 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -1943,6 +1943,14 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, irq_type); return -EINVAL; } + + /* + * Acking interrupts by status register does not work reliably + * but seem to work when this bit is set. + */ + if (st->chip_type == INV_MPU9150) + st->irq_mask |= INV_MPU6050_INT_RD_CLEAR; + device_set_wakeup_capable(dev, true); st->vdd_supply = devm_regulator_get(dev, "vdd");
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index 211901f..6239b1a 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -390,6 +390,8 @@ struct inv_mpu6050_state { /* enable level triggering */ #define INV_MPU6050_LATCH_INT_EN 0x20 #define INV_MPU6050_BIT_BYPASS_EN 0x2 +/* allow acking interrupts by any register read */ +#define INV_MPU6050_INT_RD_CLEAR 0x10 /* Allowed timestamp period jitter in percent */ #define INV_MPU6050_TS_PERIOD_JITTER 4
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index 10a4733..22c1ce6 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
@@ -248,7 +248,6 @@ static irqreturn_t inv_mpu6050_interrupt_handle(int irq, void *p) switch (st->chip_type) { case INV_MPU6000: case INV_MPU6050: - case INV_MPU9150: /* * WoM is not supported and interrupt status read seems to be broken for * some chips. Since data ready is the only interrupt, bypass interrupt @@ -257,6 +256,10 @@ static irqreturn_t inv_mpu6050_interrupt_handle(int irq, void *p) wom_bits = 0; int_status = INV_MPU6050_BIT_RAW_DATA_RDY_INT; goto data_ready_interrupt; + case INV_MPU9150: + /* IRQ needs to be acked */ + wom_bits = 0; + break; case INV_MPU6500: case INV_MPU6515: case INV_MPU6880:
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 55d8777..5b28a3f 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -225,6 +225,10 @@ static int st_lsm6dsx_set_fifo_odr(struct st_lsm6dsx_sensor *sensor, const struct st_lsm6dsx_reg *batch_reg; u8 data; + /* Only internal sensors have a FIFO ODR configuration register. */ + if (sensor->id >= ARRAY_SIZE(hw->settings->batch)) + return 0; + batch_reg = &hw->settings->batch[sensor->id]; if (batch_reg->addr) { int val; @@ -858,12 +862,21 @@ int st_lsm6dsx_fifo_setup(struct st_lsm6dsx_hw *hw) int i, ret; for (i = 0; i < ST_LSM6DSX_ID_MAX; i++) { + const struct iio_dev_attr **attrs; + if (!hw->iio_devs[i]) continue; + /* + * For the accelerometer, allow setting FIFO sampling frequency + * values different from the sensor sampling frequency, which + * may be needed to keep FIFO data rate low while sampling + * acceleration data at high rates for accurate event detection. + */ + attrs = i == ST_LSM6DSX_ID_ACC ? st_lsm6dsx_buffer_attrs : NULL; ret = devm_iio_kfifo_buffer_setup_ext(hw->dev, hw->iio_devs[i], &st_lsm6dsx_buffer_ops, - st_lsm6dsx_buffer_attrs); + attrs); if (ret) return ret; }
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index f15a180..46f36a6 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c
@@ -228,8 +228,10 @@ static ssize_t iio_buffer_write(struct file *filp, const char __user *buf, written = 0; add_wait_queue(&rb->pollq, &wait); do { - if (!indio_dev->info) - return -ENODEV; + if (!indio_dev->info) { + ret = -ENODEV; + break; + } if (!iio_buffer_space_available(rb)) { if (signal_pending(current)) {
diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c index 5d3c6d5..a740d1f 100644 --- a/drivers/iio/light/bh1780.c +++ b/drivers/iio/light/bh1780.c
@@ -109,9 +109,9 @@ static int bh1780_read_raw(struct iio_dev *indio_dev, case IIO_LIGHT: pm_runtime_get_sync(&bh1780->client->dev); value = bh1780_read_word(bh1780, BH1780_REG_DLOW); + pm_runtime_put_autosuspend(&bh1780->client->dev); if (value < 0) return value; - pm_runtime_put_autosuspend(&bh1780->client->dev); *val = value; return IIO_VAL_INT;
diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c index 9637479..16aeb17 100644 --- a/drivers/iio/light/vcnl4035.c +++ b/drivers/iio/light/vcnl4035.c
@@ -103,17 +103,23 @@ static irqreturn_t vcnl4035_trigger_consumer_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct vcnl4035_data *data = iio_priv(indio_dev); /* Ensure naturally aligned timestamp */ - u8 buffer[ALIGN(sizeof(u16), sizeof(s64)) + sizeof(s64)] __aligned(8) = { }; + struct { + u16 als_data; + aligned_s64 timestamp; + } buffer = { }; + unsigned int val; int ret; - ret = regmap_read(data->regmap, VCNL4035_ALS_DATA, (int *)buffer); + ret = regmap_read(data->regmap, VCNL4035_ALS_DATA, &val); if (ret < 0) { dev_err(&data->client->dev, "Trigger consumer can't read from sensor.\n"); goto fail_read; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, - iio_get_time_ns(indio_dev)); + + buffer.als_data = val; + iio_push_to_buffers_with_timestamp(indio_dev, &buffer, + iio_get_time_ns(indio_dev)); fail_read: iio_trigger_notify_done(indio_dev->trig); @@ -381,7 +387,7 @@ static const struct iio_chan_spec vcnl4035_channels[] = { .sign = 'u', .realbits = 16, .storagebits = 16, - .endianness = IIO_LE, + .endianness = IIO_CPU, }, }, { @@ -395,7 +401,7 @@ static const struct iio_chan_spec vcnl4035_channels[] = { .sign = 'u', .realbits = 16, .storagebits = 16, - .endianness = IIO_LE, + .endianness = IIO_CPU, }, }, };
diff --git a/drivers/iio/light/veml6070.c b/drivers/iio/light/veml6070.c index 6d4483c..74d7246 100644 --- a/drivers/iio/light/veml6070.c +++ b/drivers/iio/light/veml6070.c
@@ -134,9 +134,7 @@ static int veml6070_read(struct veml6070_data *data) if (ret < 0) return ret; - ret = (msb << 8) | lsb; - - return 0; + return (msb << 8) | lsb; } static const struct iio_chan_spec veml6070_channels[] = {
diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 9345fb6..fb313e5 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig
@@ -143,8 +143,7 @@ tristate "MEMSIC MMC5633 3-axis magnetic sensor" select REGMAP_I2C select REGMAP_I3C if I3C - depends on I2C - depends on I3C || !I3C + depends on I3C_OR_I2C help Say yes here to build support for the MEMSIC MMC5633 3-axis magnetic sensor.
diff --git a/drivers/iio/magnetometer/tlv493d.c b/drivers/iio/magnetometer/tlv493d.c index ec53fd4..e5e050a 100644 --- a/drivers/iio/magnetometer/tlv493d.c +++ b/drivers/iio/magnetometer/tlv493d.c
@@ -171,7 +171,7 @@ static s16 tlv493d_get_channel_data(u8 *b, enum tlv493d_channels ch) switch (ch) { case TLV493D_AXIS_X: val = FIELD_GET(TLV493D_BX_MAG_X_AXIS_MSB, b[TLV493D_RD_REG_BX]) << 4 | - FIELD_GET(TLV493D_BX2_MAG_X_AXIS_LSB, b[TLV493D_RD_REG_BX2]) >> 4; + FIELD_GET(TLV493D_BX2_MAG_X_AXIS_LSB, b[TLV493D_RD_REG_BX2]); break; case TLV493D_AXIS_Y: val = FIELD_GET(TLV493D_BY_MAG_Y_AXIS_MSB, b[TLV493D_RD_REG_BY]) << 4 |
diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c index e759f91..5a5e6e4 100644 --- a/drivers/iio/orientation/hid-sensor-rotation.c +++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -19,8 +19,13 @@ struct dev_rot_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info quaternion; struct { - s32 sampled_vals[4]; - aligned_s64 timestamp; + IIO_DECLARE_QUATERNION(s32, sampled_vals); + /* + * ABI regression avoidance: There are two copies of the same + * timestamp in case of userspace depending on broken alignment + * from older kernels. + */ + aligned_s64 timestamp[2]; } scan; int scale_pre_decml; int scale_post_decml; @@ -154,8 +159,19 @@ static int dev_rot_proc_event(struct hid_sensor_hub_device *hsdev, if (!rot_state->timestamp) rot_state->timestamp = iio_get_time_ns(indio_dev); - iio_push_to_buffers_with_timestamp(indio_dev, &rot_state->scan, - rot_state->timestamp); + /* + * ABI regression avoidance: IIO previously had an incorrect + * implementation of iio_push_to_buffers_with_timestamp() that + * put the timestamp in the last 8 bytes of the buffer, which + * was incorrect according to the IIO ABI. To avoid breaking + * userspace that may be depending on this broken behavior, we + * put the timestamp in both the correct place [0] and the old + * incorrect place [1]. + */ + rot_state->scan.timestamp[0] = rot_state->timestamp; + rot_state->scan.timestamp[1] = rot_state->timestamp; + + iio_push_to_buffers(indio_dev, &rot_state->scan); rot_state->timestamp = 0; }
diff --git a/drivers/iio/potentiometer/mcp4131.c b/drivers/iio/potentiometer/mcp4131.c index ad08282..56c9111 100644 --- a/drivers/iio/potentiometer/mcp4131.c +++ b/drivers/iio/potentiometer/mcp4131.c
@@ -221,7 +221,7 @@ static int mcp4131_write_raw(struct iio_dev *indio_dev, mutex_lock(&data->lock); - data->buf[0] = address << MCP4131_WIPER_SHIFT; + data->buf[0] = address; data->buf[0] |= MCP4131_WRITE | (val >> 8); data->buf[1] = val & 0xFF; /* 8 bits here */
diff --git a/drivers/iio/pressure/abp2030pa.c b/drivers/iio/pressure/abp2030pa.c index 4ca056a..b44f1bf 100644 --- a/drivers/iio/pressure/abp2030pa.c +++ b/drivers/iio/pressure/abp2030pa.c
@@ -520,7 +520,7 @@ int abp2_common_probe(struct device *dev, const struct abp2_ops *ops, int irq) data->p_offset = div_s64(odelta * data->pmin, pdelta) - data->outmin; if (data->irq > 0) { - ret = devm_request_irq(dev, irq, abp2_eoc_handler, IRQF_ONESHOT, + ret = devm_request_irq(dev, irq, abp2_eoc_handler, 0, dev_name(dev), data); if (ret) return ret;
diff --git a/drivers/iio/proximity/hx9023s.c b/drivers/iio/proximity/hx9023s.c index 2918dfc..17e00ee 100644 --- a/drivers/iio/proximity/hx9023s.c +++ b/drivers/iio/proximity/hx9023s.c
@@ -719,6 +719,9 @@ static int hx9023s_set_samp_freq(struct hx9023s_data *data, int val, int val2) struct device *dev = regmap_get_device(data->regmap); unsigned int i, period_ms; + if (!val && !val2) + return -EINVAL; + period_ms = div_u64(NANO, (val * MEGA + val2)); for (i = 0; i < ARRAY_SIZE(hx9023s_samp_freq_table); i++) { @@ -1034,9 +1037,8 @@ static int hx9023s_send_cfg(const struct firmware *fw, struct hx9023s_data *data if (!bin) return -ENOMEM; - memcpy(bin->data, fw->data, fw->size); - bin->fw_size = fw->size; + memcpy(bin->data, fw->data, bin->fw_size); bin->fw_ver = bin->data[FW_VER_OFFSET]; bin->reg_count = get_unaligned_le16(bin->data + FW_REG_CNT_OFFSET);
diff --git a/drivers/iio/proximity/rfd77402.c b/drivers/iio/proximity/rfd77402.c index 6afdbfc..81b8daf 100644 --- a/drivers/iio/proximity/rfd77402.c +++ b/drivers/iio/proximity/rfd77402.c
@@ -173,10 +173,8 @@ static int rfd77402_wait_for_result(struct rfd77402_data *data) struct i2c_client *client = data->client; int val, ret; - if (data->irq_en) { - reinit_completion(&data->completion); + if (data->irq_en) return rfd77402_wait_for_irq(data); - } /* * As per RFD77402 datasheet section '3.1.1 Single Measure', the @@ -204,6 +202,9 @@ static int rfd77402_measure(struct rfd77402_data *data) if (ret < 0) return ret; + if (data->irq_en) + reinit_completion(&data->completion); + ret = i2c_smbus_write_byte_data(client, RFD77402_CMD_R, RFD77402_CMD_SINGLE | RFD77402_CMD_VALID);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 794b977..78ac2ff 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig
@@ -6,6 +6,7 @@ depends on INET depends on m || IPV6 != m depends on !ALPHA + select DMA_SHARED_BUFFER select IRQ_POLL select DIMLIB help
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b415d4a..ee4a2bc 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c
@@ -926,6 +926,13 @@ static int gid_table_setup_one(struct ib_device *ib_dev) if (err) return err; + /* + * Mark the device as ready for GID cache updates. This allows netdev + * event handlers to update the GID cache even before the device is + * fully registered. + */ + ib_device_enable_gid_updates(ib_dev); + rdma_roce_rescan_device(ib_dev); return err; @@ -1637,6 +1644,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { + /* + * Clear the GID updates mark first to prevent event handlers from + * accessing the device while it's being torn down. + */ + ib_device_disable_gid_updates(device); + /* The cleanup function waits for all in-progress workqueue * elements and cleans up the GID cache. This function should be * called after the device was removed from the devices list and
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e54c07c..9480d1a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c
@@ -2729,6 +2729,9 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return 0; + if (id_priv->restricted_node_type != RDMA_NODE_UNSPECIFIED && + id_priv->restricted_node_type != cma_dev->device->node_type) + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, @@ -2736,6 +2739,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, if (IS_ERR(dev_id_priv)) return PTR_ERR(dev_id_priv); + dev_id_priv->restricted_node_type = id_priv->restricted_node_type; dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); @@ -4194,7 +4198,7 @@ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type) } mutex_lock(&lock); - if (id_priv->cma_dev) + if (READ_ONCE(id_priv->state) != RDMA_CM_IDLE) ret = -EALREADY; else id_priv->restricted_node_type = node_type;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0510276..a2c3666 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h
@@ -100,6 +100,9 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); +void ib_device_enable_gid_updates(struct ib_device *device); +void ib_device_disable_gid_updates(struct ib_device *device); + typedef int (*nldev_callback)(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1b5f1ee..558b739 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c
@@ -93,6 +93,7 @@ static struct workqueue_struct *ib_unreg_wq; static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 +#define DEVICE_GID_UPDATES XA_MARK_2 static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 @@ -2412,11 +2413,42 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, unsigned long index; down_read(&devices_rwsem); - xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) + xa_for_each_marked(&devices, index, dev, DEVICE_GID_UPDATES) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); up_read(&devices_rwsem); } +/** + * ib_device_enable_gid_updates - Mark device as ready for GID cache updates + * @device: Device to mark + * + * Called after GID table is allocated and initialized. After this mark is set, + * netdevice event handlers can update the device's GID cache. This allows + * events that arrive during device registration to be processed, avoiding + * stale GID entries when netdev properties change during the device + * registration process. + */ +void ib_device_enable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_set_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + +/** + * ib_device_disable_gid_updates - Clear the GID updates mark + * @device: Device to unmark + * + * Called before GID table cleanup to prevent event handlers from accessing + * the device while it's being torn down. + */ +void ib_device_disable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_clear_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + /* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index fc45c38..4fafe39 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c
@@ -608,14 +608,29 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); - } else if (sg_cnt > 1) { - ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, - remote_addr, rkey, dir); - } else { - ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, - remote_addr, rkey, dir); + /* + * If MR init succeeded or failed for a reason other + * than pool exhaustion, that result is final. + * + * Pool exhaustion (-EAGAIN) from the max_sgl_rd + * optimization is recoverable: fall back to + * direct SGE posting. iWARP and force_mr require + * MRs unconditionally, so -EAGAIN is terminal. + */ + if (ret != -EAGAIN || + rdma_protocol_iwarp(qp->device, port_num) || + unlikely(rdma_rw_force_mr)) + goto out; } + if (sg_cnt > 1) + ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, + remote_addr, rkey, dir); + else + ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, + remote_addr, rkey, dir); + +out: if (ret < 0) goto out_unmap_sg; return ret; @@ -686,14 +701,16 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return ret; /* - * IOVA mapping not available. Check if MR registration provides - * better performance than multiple SGE entries. + * IOVA not available; fall back to the map_wrs path, which maps + * each bvec as a direct SGE. This is always correct: the MR path + * is a throughput optimization, not a correctness requirement. + * (iWARP, which does require MRs, is handled by the check above.) + * + * The rdma_rw_io_needs_mr() gate is not used here because nr_bvec + * is a raw page count that overstates DMA entry demand -- the bvec + * caller has no post-DMA-coalescing segment count, and feeding the + * inflated count into the MR path exhausts the pool on RDMA READs. */ - if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec)) - return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, - nr_bvec, &iter, remote_addr, - rkey, dir); - return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter, remote_addr, rkey, dir); }
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index cff4fcc..edc34c6 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d if (dirty) ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, - DMA_BIDIRECTIONAL, 0); + DMA_BIDIRECTIONAL, + DMA_ATTR_REQUIRE_COHERENT); for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) { unpin_user_page_range_dirty_lock(sg_page(sg), @@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; - unsigned long dma_attr = 0; + unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT; struct mm_struct *mm; unsigned long npages; int pinned, ret;
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f5298c3..d30f24b 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -218,13 +218,11 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, err = ib_umem_dmabuf_map_pages(umem_dmabuf); if (err) - goto err_unpin; + goto err_release; dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); return umem_dmabuf; -err_unpin: - dma_buf_unpin(umem_dmabuf->attach); err_release: dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); ib_umem_release(&umem_dmabuf->umem);
diff --git a/drivers/infiniband/core/uverbs_std_types_dmabuf.c b/drivers/infiniband/core/uverbs_std_types_dmabuf.c index dfdfcd1..4a7f8b6 100644 --- a/drivers/infiniband/core/uverbs_std_types_dmabuf.c +++ b/drivers/infiniband/core/uverbs_std_types_dmabuf.c
@@ -10,6 +10,8 @@ #include "rdma_core.h" #include "uverbs.h" +MODULE_IMPORT_NS("DMA_BUF"); + static int uverbs_dmabuf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) {
diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index cf3bf08..71a7ca2 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c
@@ -54,9 +54,6 @@ static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev) { struct bng_re_chip_ctx *chip_ctx; - if (!rdev->chip_ctx) - return; - kfree(rdev->dev_attr); rdev->dev_attr = NULL; @@ -124,12 +121,6 @@ static int bng_re_net_ring_free(struct bng_re_dev *rdev, struct bnge_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!rdev) - return rc; - - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); @@ -150,10 +141,7 @@ static int bng_re_net_ring_alloc(struct bng_re_dev *rdev, struct hwrm_ring_alloc_input req = {}; struct hwrm_ring_alloc_output resp; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); req.enables = 0; @@ -184,10 +172,7 @@ static int bng_re_stats_ctx_free(struct bng_re_dev *rdev) struct hwrm_stat_ctx_free_input req = {}; struct hwrm_stat_ctx_free_output resp = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id); @@ -208,13 +193,10 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) struct hwrm_stat_ctx_alloc_output resp = {}; struct hwrm_stat_ctx_alloc_input req = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; + int rc; stats->fw_id = BNGE_INVALID_STATS_CTX_ID; - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(stats->dma_map); @@ -228,7 +210,7 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) return rc; } -static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) +static int bng_re_query_hwrm_version(struct bng_re_dev *rdev) { struct bnge_auxr_dev *aux_dev = rdev->aux_dev; struct hwrm_ver_get_output ver_get_resp = {}; @@ -248,7 +230,7 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", rc); - return; + return rc; } cctx = rdev->chip_ctx; @@ -262,6 +244,8 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (!cctx->hwrm_cmd_max_timeout) cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT; + + return 0; } static void bng_re_dev_uninit(struct bng_re_dev *rdev) @@ -303,7 +287,7 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to register with netedev: %#x\n", rc); - return -EINVAL; + goto reg_netdev_fail; } set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -312,37 +296,34 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) ibdev_err(&rdev->ibdev, "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", rdev->aux_dev->auxr_info->msix_requested); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -EINVAL; + rc = -EINVAL; + goto msix_ctx_fail; } ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", rdev->aux_dev->auxr_info->msix_requested); rc = bng_re_setup_chip_ctx(rdev); if (rc) { - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); - return -EINVAL; + goto msix_ctx_fail; } - bng_re_query_hwrm_version(rdev); + rc = bng_re_query_hwrm_version(rdev); + if (rc) + goto destroy_chip_ctx; rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); - goto fail; + goto destroy_chip_ctx; } /* Allocate nq record memory */ rdev->nqr = kzalloc_obj(*rdev->nqr); if (!rdev->nqr) { - bng_re_destroy_chip_ctx(rdev); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -ENOMEM; + rc = -ENOMEM; + goto nq_alloc_fail; } rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested; @@ -411,9 +392,15 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) free_ring: bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); free_rcfw: + kfree(rdev->nqr); +nq_alloc_fail: bng_re_free_rcfw_channel(&rdev->rcfw); -fail: - bng_re_dev_uninit(rdev); +destroy_chip_ctx: + bng_re_destroy_chip_ctx(rdev); +msix_ctx_fail: + bnge_unregister_dev(rdev->aux_dev); + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); +reg_netdev_fail: return rc; } @@ -486,8 +473,7 @@ static void bng_re_remove(struct auxiliary_device *adev) rdev = dev_info->rdev; - if (rdev) - bng_re_remove_device(rdev, adev); + bng_re_remove_device(rdev, adev); kfree(dev_info); }
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 229b0ad..e97b5f0 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/log2.h> @@ -310,23 +310,19 @@ static inline struct efa_comp_ctx *efa_com_get_comp_ctx_by_cmd_id(struct efa_com return &aq->comp_ctx[ctx_id]; } -static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static void __efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { struct efa_admin_aq_entry *aqe; - struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; u16 ctx_id; u16 pi; - comp_ctx = efa_com_alloc_comp_ctx(aq); - if (!comp_ctx) - return ERR_PTR(-EINVAL); - queue_size_mask = aq->depth - 1; pi = aq->sq.pc & queue_size_mask; ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx); @@ -360,8 +356,6 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu /* barrier not needed in case of writel */ writel(aq->sq.pc, aq->sq.db_addr); - - return comp_ctx; } static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) @@ -394,28 +388,25 @@ static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) return 0; } -static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static int efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { - struct efa_comp_ctx *comp_ctx; - spin_lock(&aq->sq.lock); if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n"); spin_unlock(&aq->sq.lock); - return ERR_PTR(-ENODEV); + return -ENODEV; } - comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, - comp_size_in_bytes); + __efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); spin_unlock(&aq->sq.lock); - if (IS_ERR(comp_ctx)) - clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - return comp_ctx; + return 0; } static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, @@ -512,7 +503,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c { unsigned long timeout; unsigned long flags; - int err; timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); @@ -532,24 +522,20 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c atomic64_inc(&aq->stats.no_completion); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, struct efa_com_admin_queue *aq) { unsigned long flags; - int err; wait_for_completion_timeout(&comp_ctx->wait_event, usecs_to_jiffies(aq->completion_timeout)); @@ -585,14 +571,11 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com aq->cq.cc); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } /* @@ -642,30 +625,39 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), cmd->aq_common_descriptor.opcode); - comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); - if (IS_ERR(comp_ctx)) { + + comp_ctx = efa_com_alloc_comp_ctx(aq); + if (!comp_ctx) { + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + up(&aq->avail_cmds); + return -EINVAL; + } + + err = efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size, comp, comp_size); + if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to submit command %s (opcode %u) err %pe\n", + "Failed to submit command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx); + cmd->aq_common_descriptor.opcode, err); + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); atomic64_inc(&aq->stats.cmd_err); - return PTR_ERR(comp_ctx); + return err; } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to process command %s (opcode %u) comp_status %d err %d\n", + "Failed to process command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, - comp_ctx->user_cqe->acq_common_descriptor.status, err); + cmd->aq_common_descriptor.opcode, err); atomic64_inc(&aq->stats.cmd_err); } + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); return err;
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b5b93b4..b0aa7c0 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1661,7 +1661,7 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, struct efa_mr *mr; if (udata && udata->inlen && - !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { + !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 5b3f40b..a5671da 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -508,6 +508,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, { const struct ib_global_route *grh; enum rdma_network_type net; + u8 smac[ETH_ALEN]; u16 vlan; int rc; @@ -518,7 +519,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, grh = rdma_ah_read_grh(attr); - rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]); + rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, smac); if (rc) return rc; @@ -536,6 +537,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, if (rc) return rc; + ether_addr_copy(hdr->eth.smac_h, smac); ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac); if (net == RDMA_NETWORK_IPV4) { @@ -1218,7 +1220,7 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); struct ionic_vcq *vcq = to_ionic_vcq(ibcq); struct ionic_tbl_buf buf = {}; - struct ionic_cq_resp resp; + struct ionic_cq_resp resp = {}; struct ionic_cq_req req; int udma_idx = 0, rc;
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 164046d..bd4c73e 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -81,6 +81,8 @@ static int ionic_query_port(struct ib_device *ibdev, u32 port, return -EINVAL; ndev = ib_device_get_netdev(ibdev, port); + if (!ndev) + return -ENODEV; if (netif_running(ndev) && netif_carrier_ok(ndev)) { attr->state = IB_PORT_ACTIVE;
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 3d084d4..91c0e72 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c
@@ -2241,11 +2241,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; + int ret; /* create an hte and cm_node for this instance */ cm_node = kzalloc_obj(*cm_node, GFP_ATOMIC); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; @@ -2348,8 +2349,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, arpindex = -EINVAL; } - if (arpindex < 0) + if (arpindex < 0) { + ret = -EINVAL; goto err; + } ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); @@ -2360,7 +2363,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, err: kfree(cm_node); - return NULL; + return ERR_PTR(ret); } static void irdma_destroy_connection(struct irdma_cm_node *cm_node) @@ -3021,8 +3024,8 @@ static int irdma_create_cm_node(struct irdma_cm_core *cm_core, /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); - if (!cm_node) - return -ENOMEM; + if (IS_ERR(cm_node)) + return PTR_ERR(cm_node); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; @@ -3219,9 +3222,9 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); - if (!cm_node) { + if (IS_ERR(cm_node)) { ibdev_dbg(&cm_core->iwdev->ibdev, - "CM: allocate node failed\n"); + "CM: allocate node failed ret=%ld\n", PTR_ERR(cm_node)); refcount_dec(&listener->refcnt); return; } @@ -4239,21 +4242,21 @@ static void irdma_cm_event_handler(struct work_struct *work) irdma_cm_event_reset(event); break; case IRDMA_CM_EVENT_CONNECTED: - if (!event->cm_node->cm_id || - event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state != IRDMA_CM_STATE_OFFLOADED) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: - if (!event->cm_node->cm_id || + if (!cm_node->cm_id || cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: - if (!event->cm_node->cm_id || - event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_event_connect_error(event); break; @@ -4263,7 +4266,7 @@ static void irdma_cm_event_handler(struct work_struct *work) break; } - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cm_node(cm_node); kfree(event); }
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index ac3721a..4718acf 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1438,7 +1438,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ -static int irdma_round_up_wq(u32 wqdepth) +static u64 irdma_round_up_wq(u64 wqdepth) { int scount = 1; @@ -1491,15 +1491,16 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD); - *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); - - if (*sqdepth < min_size) - *sqdepth = min_size; - else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_wq_quanta) return -EINVAL; + *sqdepth = hw_quanta; return 0; } @@ -1513,15 +1514,16 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD); - *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); - - if (*rqdepth < min_size) - *rqdepth = min_size; - else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_rq_quanta) return -EINVAL; + *rqdepth = hw_quanta; return 0; } @@ -1535,13 +1537,16 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth) { - *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)srq_size << shift) + IRDMA_RQ_RSVD); - if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) - *srqdepth = uk_attrs->min_hw_wq_size << shift; - else if (*srqdepth > uk_attrs->max_hw_srq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_srq_quanta) return -EINVAL; + *srqdepth = hw_quanta; return 0; }
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index ab8c528..495e5da 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2322,8 +2322,6 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) struct irdma_qp *qp = sc_qp->qp_uk.back_qp; struct ib_qp_attr attr; - if (qp->iwdev->rf->reset) - return; attr.qp_state = IB_QPS_ERR; if (rdma_protocol_roce(qp->ibqp.device, 1))
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 15af532..95f590c 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -558,7 +558,8 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } irdma_qp_rem_ref(&iwqp->ibqp); - wait_for_completion(&iwqp->free_qp); + if (!iwdev->rf->reset) + wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); @@ -1105,6 +1106,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; + init_completion(&iwqp->free_qp); if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ @@ -1129,7 +1131,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, } } - init_completion(&iwqp->free_qp); return 0; error: @@ -1462,8 +1463,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->remote_atomics_en = true; } - wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); - ibdev_dbg(&iwdev->ibdev, "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, @@ -1540,6 +1539,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, @@ -1745,6 +1745,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, @@ -3723,6 +3724,7 @@ static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, err: ib_umem_release(region); + iwmr->region = NULL; return err; } @@ -5212,7 +5214,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah, #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ibah->pd->device); - struct irdma_create_ah_resp uresp; + struct irdma_create_ah_resp uresp = {}; struct irdma_ah *parent_ah; int err;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ef06350..5c182ae 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -428,6 +428,8 @@ static int mthca_create_srq(struct ib_srq *ibsrq, if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { mthca_free_srq(to_mdev(ibsrq->device), srq); + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, + context->db_tab, ucmd.db_index); return -EFAULT; } @@ -436,6 +438,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); if (udata) { struct mthca_ucontext *context = rdma_udata_to_drv_context( @@ -446,8 +449,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } - - mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); return 0; }
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index bf4accf..d6fc3d6 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c
@@ -313,6 +313,8 @@ static const struct xpad_device { { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x1532, 0x0a29, "Razer Wolverine V2", 0, XTYPE_XBOXONE }, + { 0x1532, 0x0a57, "Razer Wolverine V3 Pro (Wired)", 0, XTYPE_XBOX360 }, + { 0x1532, 0x0a59, "Razer Wolverine V3 Pro (2.4 GHz Dongle)", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, @@ -360,6 +362,8 @@ static const struct xpad_device { { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, + { 0x20bc, 0x5134, "BETOP BTP-KP50B Xinput Dongle", 0, XTYPE_XBOX360 }, + { 0x20bc, 0x514a, "BETOP BTP-KP50C Xinput Dongle", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2064, "PowerA Wired Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, @@ -562,6 +566,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1a86), /* Nanjing Qinheng Microelectronics (WCH) */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ + XPAD_XBOX360_VENDOR(0x20bc), /* BETOP wireless dongles */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */
diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 8e3cea9..e772bac 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c
@@ -286,6 +286,8 @@ struct bcm5974 { const struct tp_finger *index[MAX_FINGERS]; /* finger index data */ struct input_mt_pos pos[MAX_FINGERS]; /* position array */ int slots[MAX_FINGERS]; /* slot assignments */ + struct work_struct mode_reset_work; + unsigned long last_mode_reset; }; /* trackpad finger block data, le16-aligned */ @@ -696,6 +698,32 @@ static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) return retval; } +/* + * Mode switches sent before the control response are ignored. + * Fixing this state requires switching to normal mode and waiting + * about 1ms before switching back to wellspring mode. + */ +static void bcm5974_mode_reset_work(struct work_struct *work) +{ + struct bcm5974 *dev = container_of(work, struct bcm5974, mode_reset_work); + int error; + + guard(mutex)(&dev->pm_mutex); + dev->last_mode_reset = jiffies; + + error = bcm5974_wellspring_mode(dev, false); + if (error) { + dev_err(&dev->intf->dev, "reset to normal mode failed\n"); + return; + } + + fsleep(1000); + + error = bcm5974_wellspring_mode(dev, true); + if (error) + dev_err(&dev->intf->dev, "mode switch after reset failed\n"); +} + static void bcm5974_irq_button(struct urb *urb) { struct bcm5974 *dev = urb->context; @@ -752,10 +780,20 @@ static void bcm5974_irq_trackpad(struct urb *urb) if (dev->tp_urb->actual_length == 2) goto exit; - if (report_tp_state(dev, dev->tp_urb->actual_length)) + if (report_tp_state(dev, dev->tp_urb->actual_length)) { dprintk(1, "bcm5974: bad trackpad package, length: %d\n", dev->tp_urb->actual_length); + /* + * Receiving a HID packet means we aren't in wellspring mode. + * If we haven't tried a reset in the last second, try now. + */ + if (dev->tp_urb->actual_length == 8 && + time_after(jiffies, dev->last_mode_reset + msecs_to_jiffies(1000))) { + schedule_work(&dev->mode_reset_work); + } + } + exit: error = usb_submit_urb(dev->tp_urb, GFP_ATOMIC); if (error) @@ -906,6 +944,7 @@ static int bcm5974_probe(struct usb_interface *iface, dev->intf = iface; dev->input = input_dev; dev->cfg = *cfg; + INIT_WORK(&dev->mode_reset_work, bcm5974_mode_reset_work); mutex_init(&dev->pm_mutex); /* setup urbs */ @@ -998,6 +1037,7 @@ static void bcm5974_disconnect(struct usb_interface *iface) { struct bcm5974 *dev = usb_get_intfdata(iface); + disable_work_sync(&dev->mode_reset_work); usb_set_intfdata(iface, NULL); input_unregister_device(dev->input);
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index ac4041a..61909e1 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c
@@ -538,6 +538,8 @@ static void rmi_f54_work(struct work_struct *work) int error; int i; + mutex_lock(&f54->data_mutex); + report_size = rmi_f54_get_report_size(f54); if (report_size == 0) { dev_err(&fn->dev, "Bad report size, report type=%d\n", @@ -546,8 +548,6 @@ static void rmi_f54_work(struct work_struct *work) goto error; /* retry won't help */ } - mutex_lock(&f54->data_mutex); - /* * Need to check if command has completed. * If not try again later.
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index d2cf940..8ebdf4f 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -1189,6 +1189,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "X6KK45xU_X6SP45xU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "WUJIE Series-X5SP4NAG"), }, .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c index 669a638..c88327d 100644 --- a/drivers/interconnect/qcom/sm8450.c +++ b/drivers/interconnect/qcom/sm8450.c
@@ -800,7 +800,7 @@ static struct qcom_icc_node qhs_compute_cfg = { .channels = 1, .buswidth = 4, .num_links = 1, - .link_nodes = { MASTER_CDSP_NOC_CFG }, + .link_nodes = { &qhm_nsp_noc_config }, }; static struct qcom_icc_node qhs_cpr_cx = { @@ -874,7 +874,7 @@ static struct qcom_icc_node qhs_lpass_cfg = { .channels = 1, .buswidth = 4, .num_links = 1, - .link_nodes = { MASTER_CNOC_LPASS_AG_NOC }, + .link_nodes = { &qhm_config_noc }, }; static struct qcom_icc_node qhs_mss_cfg = {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 81c4d77..760d5f46 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c
@@ -2909,8 +2909,21 @@ static struct iommu_domain blocked_domain = { static struct protection_domain identity_domain; +static int amd_iommu_identity_attach(struct iommu_domain *dom, struct device *dev, + struct iommu_domain *old) +{ + /* + * Don't allow attaching a device to the identity domain if SNP is + * enabled. + */ + if (amd_iommu_snp_en) + return -EINVAL; + + return amd_iommu_attach_device(dom, dev, old); +} + static const struct iommu_domain_ops identity_domain_ops = { - .attach_dev = amd_iommu_attach_device, + .attach_dev = amd_iommu_identity_attach, }; void amd_iommu_init_identity_domain(void)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5dac64b..94d5141 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c
@@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, */ if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return DMA_MAPPING_ERROR; phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs); @@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, arch_sync_dma_for_device(phys, size, dir); iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); - if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO)) + if (iova == DMA_MAPPING_ERROR && + !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); return iova; } @@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) { + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) { __iommu_dma_unmap(dev, dma_handle, size); return; } @@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state, if (WARN_ON_ONCE(iova_start_pad && offset > 0)) return -EIO; + /* + * DMA_IOVA_USE_SWIOTLB is set on state after some entry + * took SWIOTLB path, which we were supposed to prevent + * for DMA_ATTR_REQUIRE_COHERENT attribute. + */ + if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) && + (attrs & DMA_ATTR_REQUIRE_COHERENT))) + return -EOPNOTSUPP; + + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (dev_use_swiotlb(dev, size, dir) && iova_unaligned(iovad, phys, size)) { - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) return -EPERM; return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h b/drivers/iommu/generic_pt/fmt/amdv1.h index 3b2c41d..8d11b082 100644 --- a/drivers/iommu/generic_pt/fmt/amdv1.h +++ b/drivers/iommu/generic_pt/fmt/amdv1.h
@@ -191,7 +191,7 @@ static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts) } #define pt_load_entry_raw amdv1pt_load_entry_raw -static inline void +static __always_inline void amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, unsigned int oasz_lg2, const struct pt_write_attrs *attrs)
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 3e33fe6..7e7a6e7 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -1057,7 +1057,7 @@ size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova, pt_walk_range(&range, __unmap_range, &unmap); - gather_range_pages(iotlb_gather, iommu_table, iova, len, + gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped, &unmap.free_list); return unmap.unmapped;
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index d68c060..69222db 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c
@@ -1314,7 +1314,6 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) if (fault & DMA_FSTS_ITE) { head = readl(iommu->reg + DMAR_IQH_REG); head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; - head |= 1; tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; @@ -1331,7 +1330,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) do { if (qi->desc_status[head] == QI_IN_USE) qi->desc_status[head] = QI_ABORT; - head = (head - 2 + QI_LENGTH) % QI_LENGTH; + head = (head - 1 + QI_LENGTH) % QI_LENGTH; } while (head != tail); /*
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index fea10ac..57cd1db 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c
@@ -164,9 +164,12 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); - ret = iopf_for_domain_replace(domain, old, dev); - if (ret) - goto out_remove_dev_pasid; + /* SVA with non-IOMMU/PRI IOPF handling is allowed. */ + if (info->pri_supported) { + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + } /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; @@ -181,7 +184,8 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, return 0; out_unwind_iopf: - iopf_for_domain_replace(old, domain, dev); + if (info->pri_supported) + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret;
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 07d6490..bc7c723 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c
@@ -182,13 +182,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); - iommu_domain_free(domain); - } + if (list_empty(&iommu_mm->sva_domains)) { + list_del(&iommu_mm->mm_list_elm); + if (list_empty(&iommu_sva_mms)) + iommu_sva_present = false; + } - if (list_empty(&iommu_mm->sva_domains)) { - list_del(&iommu_mm->mm_list_elm); - if (list_empty(&iommu_sva_mms)) - iommu_sva_present = false; + iommu_domain_free(domain); } mutex_unlock(&iommu_sva_lock);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 35db517..50718ab 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c
@@ -1213,7 +1213,11 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, if (addr == end) goto map_end; - phys_addr = iommu_iova_to_phys(domain, addr); + /* + * Return address by iommu_iova_to_phys for 0 is + * ambiguous. Offset to address 1 if addr is 0. + */ + phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); if (!phys_addr) { map_size += pg_size; continue;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 3078381..291d766 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3474,6 +3474,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, int lpi_base; int nr_lpis; int nr_ites; + int id_bits; int sz; if (!its_alloc_device_table(its, dev_id)) @@ -3485,7 +3486,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, /* * Even if the device wants a single LPI, the ITT must be * sized as a power of two (and you need at least one bit...). + * Also honor the ITS's own EID limit. */ + id_bits = FIELD_GET(GITS_TYPER_IDBITS, its->typer) + 1; + nvecs = min_t(unsigned int, nvecs, BIT(id_bits)); nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN);
diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index e518e5d..f3fce0b 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c
@@ -699,7 +699,7 @@ static int __init gicv5_irs_init(struct gicv5_irs_chip_data *irs_data) */ if (list_empty(&irs_nodes)) { idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR0); - gicv5_global_data.virt_capable = !FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); + gicv5_global_data.virt_capable = !!FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); irs_setup_pri_bits(idr);
diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index a7e9c38..d724fe8 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c
@@ -125,32 +125,45 @@ static const struct irq_domain_ops extirq_domain_ops = { static int ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node) { - struct of_imap_parser imap_parser; - struct of_imap_item imap_item; + const __be32 *map; + u32 mapsize; int ret; - ret = of_imap_parser_init(&imap_parser, node, &imap_item); - if (ret) - return ret; + map = of_get_property(node, "interrupt-map", &mapsize); + if (!map) + return -ENOENT; + if (mapsize % sizeof(*map)) + return -EINVAL; + mapsize /= sizeof(*map); - for_each_of_imap_item(&imap_parser, &imap_item) { + while (mapsize) { struct device_node *ipar; - u32 hwirq; - int i; + u32 hwirq, intsize, j; - hwirq = imap_item.child_imap[0]; - if (hwirq >= MAXIRQ) { - of_node_put(imap_item.parent_args.np); + if (mapsize < 3) return -EINVAL; - } + hwirq = be32_to_cpup(map); + if (hwirq >= MAXIRQ) + return -EINVAL; priv->nirq = max(priv->nirq, hwirq + 1); - ipar = of_node_get(imap_item.parent_args.np); - priv->map[hwirq].fwnode = of_fwnode_handle(ipar); + ipar = of_find_node_by_phandle(be32_to_cpup(map + 2)); + map += 3; + mapsize -= 3; + if (!ipar) + return -EINVAL; + priv->map[hwirq].fwnode = &ipar->fwnode; + ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize); + if (ret) + return ret; - priv->map[hwirq].param_count = imap_item.parent_args.args_count; - for (i = 0; i < priv->map[hwirq].param_count; i++) - priv->map[hwirq].param[i] = imap_item.parent_args.args[i]; + if (intsize > mapsize) + return -EINVAL; + + priv->map[hwirq].param_count = intsize; + for (j = 0; j < intsize; ++j) + priv->map[hwirq].param[j] = be32_to_cpup(map++); + mapsize -= intsize; } return 0; } @@ -177,8 +190,10 @@ static int ls_extirq_probe(struct platform_device *pdev) return dev_err_probe(dev, -ENOMEM, "Failed to allocate memory\n"); priv->intpcr = devm_of_iomap(dev, node, 0, NULL); - if (!priv->intpcr) - return dev_err_probe(dev, -ENOMEM, "Cannot ioremap OF node %pOF\n", node); + if (IS_ERR(priv->intpcr)) { + return dev_err_probe(dev, PTR_ERR(priv->intpcr), + "Cannot ioremap OF node %pOF\n", node); + } ret = ls_extirq_parse_map(priv, node); if (ret)
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 09e6404..8e210cb 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c
@@ -136,7 +136,7 @@ static void icu_unmask_irq(struct irq_data *d) } } -struct irq_chip icu_irq_chip = { +static const struct irq_chip icu_irq_chip = { .name = "icu_irq", .irq_mask = icu_mask_irq, .irq_mask_ack = icu_mask_ack_irq,
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 83f31ea..1813205 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -306,6 +306,8 @@ static int mpm_pd_power_off(struct generic_pm_domain *genpd) if (ret < 0) return ret; + mbox_client_txdone(priv->mbox_chan, 0); + return 0; } @@ -434,6 +436,7 @@ static int qcom_mpm_probe(struct platform_device *pdev, struct device_node *pare } priv->mbox_client.dev = dev; + priv->mbox_client.knows_txdone = true; priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0); if (IS_ERR(priv->mbox_chan)) { ret = PTR_ERR(priv->mbox_chan);
diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c index da2bc43..03e93b0 100644 --- a/drivers/irqchip/irq-renesas-rzv2h.c +++ b/drivers/irqchip/irq-renesas-rzv2h.c
@@ -621,7 +621,7 @@ static int rzv2h_icu_probe_common(struct platform_device *pdev, struct device_no return 0; pm_put: - pm_runtime_put(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); return ret; }
diff --git a/drivers/irqchip/irq-riscv-aplic-main.c b/drivers/irqchip/irq-riscv-aplic-main.c index 4495ca2..d9afb6a 100644 --- a/drivers/irqchip/irq-riscv-aplic-main.c +++ b/drivers/irqchip/irq-riscv-aplic-main.c
@@ -116,6 +116,16 @@ static struct syscore aplic_syscore = { .ops = &aplic_syscore_ops, }; +static bool aplic_syscore_registered __ro_after_init; + +static void aplic_syscore_init(void) +{ + if (!aplic_syscore_registered) { + register_syscore(&aplic_syscore); + aplic_syscore_registered = true; + } +} + static int aplic_pm_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct aplic_priv *priv = container_of(nb, struct aplic_priv, genpd_nb); @@ -140,7 +150,7 @@ static void aplic_pm_remove(void *data) struct device *dev = priv->dev; list_del(&priv->head); - if (dev->pm_domain) + if (dev->pm_domain && dev->of_node) dev_pm_genpd_remove_notifier(dev); } @@ -155,7 +165,7 @@ static int aplic_pm_add(struct device *dev, struct aplic_priv *priv) priv->saved_hw_regs.srcs = srcs; list_add(&priv->head, &aplics); - if (dev->pm_domain) { + if (dev->pm_domain && dev->of_node) { priv->genpd_nb.notifier_call = aplic_pm_notifier; ret = dev_pm_genpd_add_notifier(dev, &priv->genpd_nb); if (ret) @@ -372,18 +382,21 @@ static int aplic_probe(struct platform_device *pdev) rc = aplic_msi_setup(dev, regs); else rc = aplic_direct_setup(dev, regs); - if (rc) + + if (rc) { dev_err_probe(dev, rc, "failed to setup APLIC in %s mode\n", msi_mode ? "MSI" : "direct"); - else - register_syscore(&aplic_syscore); + return rc; + } + + aplic_syscore_init(); #ifdef CONFIG_ACPI if (!acpi_disabled) acpi_dev_clear_dependencies(ACPI_COMPANION(dev)); #endif - return rc; + return 0; } static const struct of_device_id aplic_match[] = {
diff --git a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c index 5c74c56..612f397 100644 --- a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c +++ b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c
@@ -250,6 +250,7 @@ static int rpmi_sysmsi_probe(struct platform_device *pdev) rc = riscv_acpi_get_gsi_info(fwnode, &priv->gsi_base, &id, &nr_irqs, NULL); if (rc) { + mbox_free_channel(priv->chan); dev_err(dev, "failed to find GSI mapping\n"); return rc; }
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 686d392..5b0dac1 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c
@@ -172,8 +172,13 @@ static void plic_irq_disable(struct irq_data *d) static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + u32 __iomem *reg; + bool enabled; - if (unlikely(irqd_irq_disabled(d))) { + reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32); + enabled = readl(reg) & BIT(d->hwirq % 32); + + if (unlikely(!enabled)) { plic_toggle(handler, d->hwirq, 1); writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); plic_toggle(handler, d->hwirq, 0);
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 7cfed24..3c8bc75 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c
@@ -168,7 +168,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) mutex_unlock(&dmxdev->mutex); return -ENOMEM; } - dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE); + dmxdev->dvr_buffer.data = mem; + dmxdev->dvr_buffer.size = DVR_BUFFER_SIZE; + dvb_ringbuffer_reset(&dmxdev->dvr_buffer); if (dmxdev->may_do_mmap) dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr", &dmxdev->mutex,
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 8bb8dd3..a2159b2 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c
@@ -228,6 +228,9 @@ static int handle_one_ule_extension( struct dvb_net_priv *p ) unsigned char hlen = (p->ule_sndu_type & 0x0700) >> 8; unsigned char htype = p->ule_sndu_type & 0x00FF; + if (htype >= ARRAY_SIZE(ule_mandatory_ext_handlers)) + return -1; + /* Discriminate mandatory and optional extension headers. */ if (hlen == 0) { /* Mandatory extension header */
diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index aa4dd7e..8e25f97 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -3080,8 +3080,6 @@ static int ccs_init_state(struct v4l2_subdev *sd, struct v4l2_rect *crop = v4l2_subdev_state_get_crop(sd_state, pad); - guard(mutex)(&sensor->mutex); - ccs_get_native_size(ssd, crop); fmt->width = crop->width;
diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c index c7c7d4a8..13e7764 100644 --- a/drivers/media/mc/mc-request.c +++ b/drivers/media/mc/mc-request.c
@@ -192,6 +192,8 @@ static long media_request_ioctl_reinit(struct media_request *req) struct media_device *mdev = req->mdev; unsigned long flags; + mutex_lock(&mdev->req_queue_mutex); + spin_lock_irqsave(&req->lock, flags); if (req->state != MEDIA_REQUEST_STATE_IDLE && req->state != MEDIA_REQUEST_STATE_COMPLETE) { @@ -199,6 +201,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s not in idle or complete state, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } if (req->access_count) { @@ -206,6 +209,7 @@ static long media_request_ioctl_reinit(struct media_request *req) "request: %s is being accessed, cannot reinit\n", req->debug_str); spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return -EBUSY; } req->state = MEDIA_REQUEST_STATE_CLEANING; @@ -216,6 +220,7 @@ static long media_request_ioctl_reinit(struct media_request *req) spin_lock_irqsave(&req->lock, flags); req->state = MEDIA_REQUEST_STATE_IDLE; spin_unlock_irqrestore(&req->lock, flags); + mutex_unlock(&mdev->req_queue_mutex); return 0; }
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c index 28267ee..3119f3b 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
@@ -500,11 +500,15 @@ void rkvdec_hevc_run_preamble(struct rkvdec_ctx *ctx, ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS); run->ext_sps_st_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_st_rps = NULL; } if (ctx->has_sps_lt_rps) { ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS); run->ext_sps_lt_rps = ctrl ? ctrl->p_cur.p : NULL; + } else { + run->ext_sps_lt_rps = NULL; } rkvdec_run_preamble(ctx, &run->base);
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c index 97f1efd..fb4f849 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
@@ -130,7 +130,7 @@ struct rkvdec_h264_ctx { struct vdpu383_regs_h26x regs; }; -static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) +static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) { pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt; pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt; @@ -166,6 +166,31 @@ static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_d pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt; } +static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params) +{ + const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb; + + for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + pps->is_longterm |= (1 << i); + pps->ref_field_flags |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; + pps->ref_colmv_use_flag |= + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; + pps->ref_topfield_used |= + (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; + pps->ref_botfield_used |= + (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; + } + pps->pic_field_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); + pps->pic_associated_flag = + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); + + pps->cur_top_field = dec_params->top_field_order_cnt; + pps->cur_bot_field = dec_params->bottom_field_order_cnt; +} + static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run) { @@ -177,7 +202,6 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; struct rkvdec_sps_pps *hw_ps; u32 pic_width, pic_height; - u32 i; /* * HW read the SPS/PPS information from PPS packet index by PPS id. @@ -261,28 +285,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, !!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); set_field_order_cnt(&hw_ps->pps, dpb); + set_dec_params(&hw_ps->pps, dec_params); - for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) - hw_ps->pps.is_longterm |= (1 << i); - - hw_ps->pps.ref_field_flags |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; - hw_ps->pps.ref_colmv_use_flag |= - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; - hw_ps->pps.ref_topfield_used |= - (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; - hw_ps->pps.ref_botfield_used |= - (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; - } - - hw_ps->pps.pic_field_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); - hw_ps->pps.pic_associated_flag = - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); - - hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt; - hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt; } static void rkvdec_write_regs(struct rkvdec_ctx *ctx)
diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c index e4cdd21..2751f53 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c
@@ -893,7 +893,8 @@ static void rkvdec_vp9_done(struct rkvdec_ctx *ctx, update_ctx_last_info(vp9_ctx); } -static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) +static noinline_for_stack void +rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) { struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu;
diff --git a/drivers/media/platform/synopsys/Kconfig b/drivers/media/platform/synopsys/Kconfig index e798ec0..bf2ac09 100644 --- a/drivers/media/platform/synopsys/Kconfig +++ b/drivers/media/platform/synopsys/Kconfig
@@ -7,6 +7,7 @@ depends on VIDEO_DEV depends on V4L_PLATFORM_DRIVERS depends on PM && COMMON_CLK + select GENERIC_PHY_MIPI_DPHY select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API
diff --git a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c index 170346a..4d96171 100644 --- a/drivers/media/platform/synopsys/dw-mipi-csi2rx.c +++ b/drivers/media/platform/synopsys/dw-mipi-csi2rx.c
@@ -301,7 +301,7 @@ dw_mipi_csi2rx_enum_mbus_code(struct v4l2_subdev *sd, return 0; case DW_MIPI_CSI2RX_PAD_SINK: - if (code->index > csi2->formats_num) + if (code->index >= csi2->formats_num) return -EINVAL; code->code = csi2->formats[code->index].code;
diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c index 6f8e43b..fa4224d 100644 --- a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c +++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
@@ -343,7 +343,7 @@ const struct hantro_variant imx8mq_vpu_variant = { .num_regs = ARRAY_SIZE(imx8mq_reg_names) }; -static const struct of_device_id imx8mq_vpu_shared_resources[] __initconst = { +static const struct of_device_id imx8mq_vpu_shared_resources[] = { { .compatible = "nxp,imx8mq-vpu-g1", }, { .compatible = "nxp,imx8mq-vpu-g2", }, { /* sentinel */ }
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 40c76c0..f6c8e32 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1751,7 +1751,8 @@ static void uvc_video_complete(struct urb *urb) /* * Free transfer buffers. */ -static void uvc_free_urb_buffers(struct uvc_streaming *stream) +static void uvc_free_urb_buffers(struct uvc_streaming *stream, + unsigned int size) { struct usb_device *udev = stream->dev->udev; struct uvc_urb *uvc_urb; @@ -1760,7 +1761,7 @@ static void uvc_free_urb_buffers(struct uvc_streaming *stream) if (!uvc_urb->buffer) continue; - usb_free_noncoherent(udev, stream->urb_size, uvc_urb->buffer, + usb_free_noncoherent(udev, size, uvc_urb->buffer, uvc_stream_dir(stream), uvc_urb->sgt); uvc_urb->buffer = NULL; uvc_urb->sgt = NULL; @@ -1820,7 +1821,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream, if (!uvc_alloc_urb_buffer(stream, uvc_urb, urb_size, gfp_flags)) { - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, urb_size); break; } @@ -1868,7 +1869,7 @@ static void uvc_video_stop_transfer(struct uvc_streaming *stream, } if (free_buffers) - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, stream->urb_size); } /*
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 37d33d4..a2b650f 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3082,13 +3082,14 @@ static long __video_do_ioctl(struct file *file, } /* - * We need to serialize streamon/off with queueing new requests. + * We need to serialize streamon/off/reqbufs with queueing new requests. * These ioctls may trigger the cancellation of a streaming * operation, and that should not be mixed with queueing a new * request at the same time. */ if (v4l2_device_supports_requests(vfd->v4l2_dev) && - (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF)) { + (cmd == VIDIOC_STREAMON || cmd == VIDIOC_STREAMOFF || + cmd == VIDIOC_REQBUFS)) { req_queue_lock = &vfd->v4l2_dev->mdev->req_queue_mutex; if (mutex_lock_interruptible(req_queue_lock))
diff --git a/drivers/misc/amd-sbi/Kconfig b/drivers/misc/amd-sbi/Kconfig index be022c7..30e7fad 100644 --- a/drivers/misc/amd-sbi/Kconfig +++ b/drivers/misc/amd-sbi/Kconfig
@@ -1,10 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config AMD_SBRMI_I2C tristate "AMD side band RMI support" - depends on I2C + depends on I3C_OR_I2C depends on ARM || ARM64 || COMPILE_TEST select REGMAP_I2C - depends on I3C || !I3C select REGMAP_I3C if I3C help Side band RMI over I2C/I3C support for AMD out of band management.
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 47356a5..1080f9a 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c
@@ -1401,6 +1401,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, } err_map: fastrpc_buf_free(fl->cctx->remote_heap); + fl->cctx->remote_heap = NULL; err_name: kfree(name); err: @@ -2389,8 +2390,10 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) if (!err) { src_perms = BIT(QCOM_SCM_VMID_HLOS); - qcom_scm_assign_mem(res.start, resource_size(&res), &src_perms, + err = qcom_scm_assign_mem(res.start, resource_size(&res), &src_perms, data->vmperms, data->vmcount); + if (err) + goto err_free_data; } }
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 9c68f8b..21e8ad0 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -1230,10 +1230,12 @@ int lis3lv02d_init_device(struct lis3lv02d *lis3) else thread_fn = NULL; + if (thread_fn) + irq_flags |= IRQF_ONESHOT; + err = request_threaded_irq(lis3->irq, lis302dl_interrupt, thread_fn, - IRQF_TRIGGER_RISING | IRQF_ONESHOT | - irq_flags, + irq_flags | IRQF_TRIGGER_RISING, DRIVER_NAME, lis3); if (err < 0) {
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index 5902dd1..094fb1d 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig
@@ -3,6 +3,7 @@ config INTEL_MEI tristate "Intel Management Engine Interface" depends on PCI + depends on X86 || DRM_XE!=n || COMPILE_TEST default X86_64 || MATOM help The Intel Management Engine (Intel ME) provides Manageability,
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index d4612c6..1e4a41a 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c
@@ -1337,19 +1337,13 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if we need to start the dev */ if (!mei_host_is_ready(dev)) { if (mei_hw_is_ready(dev)) { - /* synchronized by dev mutex */ - if (waitqueue_active(&dev->wait_hw_ready)) { - dev_dbg(&dev->dev, "we need to start the dev.\n"); - dev->recvd_hw_ready = true; - wake_up(&dev->wait_hw_ready); - } else if (dev->dev_state != MEI_DEV_UNINITIALIZED && - dev->dev_state != MEI_DEV_POWERING_DOWN && - dev->dev_state != MEI_DEV_POWER_DOWN) { + if (dev->dev_state == MEI_DEV_ENABLED) { dev_dbg(&dev->dev, "Force link reset.\n"); schedule_work(&dev->reset_work); } else { - dev_dbg(&dev->dev, "Ignore this interrupt in state = %d\n", - dev->dev_state); + dev_dbg(&dev->dev, "we need to start the dev.\n"); + dev->recvd_hw_ready = true; + wake_up(&dev->wait_hw_ready); } } else { dev_dbg(&dev->dev, "Spurious Interrupt\n");
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 4e3423a..ac069d0 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -36,6 +36,8 @@ struct dw_mci_rockchip_priv_data { int default_sample_phase; int num_phases; bool internal_phase; + int sample_phase; + int drv_phase; }; /* @@ -573,9 +575,43 @@ static void dw_mci_rockchip_remove(struct platform_device *pdev) dw_mci_pltfm_remove(pdev); } +static int dw_mci_rockchip_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + + if (priv->internal_phase) { + priv->sample_phase = rockchip_mmc_get_phase(host, true); + priv->drv_phase = rockchip_mmc_get_phase(host, false); + } + + return dw_mci_runtime_suspend(dev); +} + +static int dw_mci_rockchip_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + int ret; + + ret = dw_mci_runtime_resume(dev); + if (ret) + return ret; + + if (priv->internal_phase) { + rockchip_mmc_set_phase(host, true, priv->sample_phase); + rockchip_mmc_set_phase(host, false, priv->drv_phase); + mci_writel(host, MISC_CON, MEM_CLK_AUTOGATE_ENABLE); + } + + return ret; +} + static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = { SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL) + RUNTIME_PM_OPS(dw_mci_rockchip_runtime_suspend, dw_mci_rockchip_runtime_resume, NULL) }; static struct platform_driver dw_mci_rockchip_pltfm_driver = {
diff --git a/drivers/mmc/host/mmci_qcom_dml.c b/drivers/mmc/host/mmci_qcom_dml.c index 3da6112..6737138 100644 --- a/drivers/mmc/host/mmci_qcom_dml.c +++ b/drivers/mmc/host/mmci_qcom_dml.c
@@ -109,6 +109,7 @@ static int of_get_dml_pipe_index(struct device_node *np, const char *name) &dma_spec)) return -ENODEV; + of_node_put(dma_spec.np); if (dma_spec.args_count) return dma_spec.args[0];
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index c944249..57e4595 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -116,7 +116,7 @@ static void sdhci_brcmstb_restore_regs(struct mmc_host *mmc, enum cfg_core_ver v writel(sr->boot_main_ctl, priv->boot_regs + SDIO_BOOT_MAIN_CTL); if (ver == SDIO_CFG_CORE_V1) { - writel(sr->sd_pin_sel, cr + SDIO_CFG_SD_PIN_SEL); + writel(sr->sd_pin_sel, cr + SDIO_CFG_V1_SD_PIN_SEL); return; }
diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index b0f91cc..6e40844 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -68,6 +68,9 @@ #define GLI_9750_MISC_TX1_DLY_VALUE 0x5 #define SDHCI_GLI_9750_MISC_SSC_OFF BIT(26) +#define SDHCI_GLI_9750_GM_BURST_SIZE 0x510 +#define SDHCI_GLI_9750_GM_BURST_SIZE_R_OSRC_LMT GENMASK(17, 16) + #define SDHCI_GLI_9750_TUNING_CONTROL 0x540 #define SDHCI_GLI_9750_TUNING_CONTROL_EN BIT(4) #define GLI_9750_TUNING_CONTROL_EN_ON 0x1 @@ -345,10 +348,16 @@ static void gli_set_9750(struct sdhci_host *host) u32 misc_value; u32 parameter_value; u32 control_value; + u32 burst_value; u16 ctrl2; gl9750_wt_on(host); + /* clear R_OSRC_Lmt to avoid DMA write corruption */ + burst_value = sdhci_readl(host, SDHCI_GLI_9750_GM_BURST_SIZE); + burst_value &= ~SDHCI_GLI_9750_GM_BURST_SIZE_R_OSRC_LMT; + sdhci_writel(host, burst_value, SDHCI_GLI_9750_GM_BURST_SIZE); + driving_value = sdhci_readl(host, SDHCI_GLI_9750_DRIVING); pll_value = sdhci_readl(host, SDHCI_GLI_9750_PLL); sw_ctrl_value = sdhci_readl(host, SDHCI_GLI_9750_SW_CTRL);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index ac7e11f..fec9329 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c
@@ -4532,8 +4532,15 @@ int sdhci_setup_host(struct sdhci_host *host) * their platform code before calling sdhci_add_host(), and we * won't assume 8-bit width for hosts without that CAP. */ - if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) + if (host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA) { + host->caps1 &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50); + if (host->quirks2 & SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400) + host->caps1 &= ~SDHCI_SUPPORT_HS400; + mmc->caps2 &= ~(MMC_CAP2_HS200 | MMC_CAP2_HS400 | MMC_CAP2_HS400_ES); + mmc->caps &= ~(MMC_CAP_DDR | MMC_CAP_UHS); + } else { mmc->caps |= MMC_CAP_4_BIT_DATA; + } if (host->quirks2 & SDHCI_QUIRK2_HOST_NO_CMD23) mmc->caps &= ~MMC_CAP_CMD23;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 0427d76..5b9dadd 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2350,14 +2350,12 @@ static int brcmnand_write(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; i < ctrl->max_oob; i += 4) oob_reg_write(ctrl, i, 0xffffffff); - if (mtd->oops_panic_write) + if (mtd->oops_panic_write) { /* switch to interrupt polling and PIO mode */ disable_ctrl_irqs(ctrl); - - if (use_dma(ctrl) && (has_edu(ctrl) || !oob) && flash_dma_buf_ok(buf)) { + } else if (use_dma(ctrl) && (has_edu(ctrl) || !oob) && flash_dma_buf_ok(buf)) { if (ctrl->dma_trans(host, addr, (u32 *)buf, oob, mtd->writesize, CMD_PROGRAM_PAGE)) - ret = -EIO; goto out;
diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 99135ec..d53b35a 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c
@@ -3133,7 +3133,7 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) sizeof(*cdns_ctrl->cdma_desc), &cdns_ctrl->dma_cdma_desc, GFP_KERNEL); - if (!cdns_ctrl->dma_cdma_desc) + if (!cdns_ctrl->cdma_desc) return -ENOMEM; cdns_ctrl->buf_size = SZ_16K;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 3842936..dfd8361 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4737,11 +4737,16 @@ static void nand_shutdown(struct mtd_info *mtd) static int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct nand_chip *chip = mtd_to_nand(mtd); + int ret; if (!chip->ops.lock_area) return -ENOTSUPP; - return chip->ops.lock_area(chip, ofs, len); + nand_get_device(chip); + ret = chip->ops.lock_area(chip, ofs, len); + nand_release_device(chip); + + return ret; } /** @@ -4753,11 +4758,16 @@ static int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) static int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct nand_chip *chip = mtd_to_nand(mtd); + int ret; if (!chip->ops.unlock_area) return -ENOTSUPP; - return chip->ops.unlock_area(chip, ofs, len); + nand_get_device(chip); + ret = chip->ops.unlock_area(chip, ofs, len); + nand_release_device(chip); + + return ret; } /* Set default functions */
diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 947fd86..f2c65eb 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c
@@ -862,6 +862,9 @@ static int pl35x_nfc_setup_interface(struct nand_chip *chip, int cs, PL35X_SMC_NAND_TAR_CYCLES(tmgs.t_ar) | PL35X_SMC_NAND_TRR_CYCLES(tmgs.t_rr); + writel(plnand->timings, nfc->conf_regs + PL35X_SMC_CYCLES); + pl35x_smc_update_regs(nfc); + return 0; }
diff --git a/drivers/mtd/parsers/redboot.c b/drivers/mtd/parsers/redboot.c index 5589051..bf162c4 100644 --- a/drivers/mtd/parsers/redboot.c +++ b/drivers/mtd/parsers/redboot.c
@@ -270,9 +270,9 @@ static int parse_redboot_partitions(struct mtd_info *master, strcpy(names, fl->img->name); #ifdef CONFIG_MTD_REDBOOT_PARTS_READONLY - if (!memcmp(names, "RedBoot", 8) || - !memcmp(names, "RedBoot config", 15) || - !memcmp(names, "FIS directory", 14)) { + if (!strcmp(names, "RedBoot") || + !strcmp(names, "RedBoot config") || + !strcmp(names, "FIS directory")) { parts[i].mask_flags = MTD_WRITEABLE; } #endif
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 8ffeb41..1eee519 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c
@@ -2345,15 +2345,15 @@ int spi_nor_hwcaps_pp2cmd(u32 hwcaps) } /** - * spi_nor_spimem_check_op - check if the operation is supported - * by controller + * spi_nor_spimem_check_read_pp_op - check if a read or a page program operation is + * supported by controller *@nor: pointer to a 'struct spi_nor' *@op: pointer to op template to be checked * * Returns 0 if operation is supported, -EOPNOTSUPP otherwise. */ -static int spi_nor_spimem_check_op(struct spi_nor *nor, - struct spi_mem_op *op) +static int spi_nor_spimem_check_read_pp_op(struct spi_nor *nor, + struct spi_mem_op *op) { /* * First test with 4 address bytes. The opcode itself might @@ -2396,7 +2396,7 @@ static int spi_nor_spimem_check_readop(struct spi_nor *nor, if (spi_nor_protocol_is_dtr(nor->read_proto)) op.dummy.nbytes *= 2; - return spi_nor_spimem_check_op(nor, &op); + return spi_nor_spimem_check_read_pp_op(nor, &op); } /** @@ -2414,7 +2414,7 @@ static int spi_nor_spimem_check_pp(struct spi_nor *nor, spi_nor_spimem_setup_op(nor, &op, pp->proto); - return spi_nor_spimem_check_op(nor, &op); + return spi_nor_spimem_check_read_pp_op(nor, &op); } /** @@ -2466,7 +2466,7 @@ spi_nor_spimem_adjust_hwcaps(struct spi_nor *nor, u32 *hwcaps) spi_nor_spimem_setup_op(nor, &op, nor->reg_proto); - if (spi_nor_spimem_check_op(nor, &op)) + if (!spi_mem_supports_op(nor->spimem, &op)) nor->flags |= SNOR_F_NO_READ_CR; } }
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 8adbec7..8967b65 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c
@@ -34,11 +34,17 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); - seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", - &client_info->ip_src, - &client_info->ip_dst, - &client_info->mac_dst, - client_info->slave->dev->name); + if (client_info->slave) + seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", + &client_info->ip_src, + &client_info->ip_dst, + &client_info->mac_dst, + client_info->slave->dev->name); + else + seq_printf(m, "%-15pI4 %-15pI4 %-17pM (none)\n", + &client_info->ip_src, + &client_info->ip_dst, + &client_info->mac_dst); } spin_unlock_bh(&bond->mode_lock);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a1de08e..a5484d1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c
@@ -324,7 +324,7 @@ static bool bond_sk_check(struct bonding *bond) } } -bool bond_xdp_check(struct bonding *bond, int mode) +bool __bond_xdp_check(int mode, int xmit_policy) { switch (mode) { case BOND_MODE_ROUNDROBIN: @@ -335,7 +335,7 @@ bool bond_xdp_check(struct bonding *bond, int mode) /* vlan+srcmac is not supported with XDP as in most cases the 802.1q * payload is not in the packet due to hardware offload. */ - if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) + if (xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) return true; fallthrough; default: @@ -343,6 +343,11 @@ bool bond_xdp_check(struct bonding *bond, int mode) } } +bool bond_xdp_check(struct bonding *bond, int mode) +{ + return __bond_xdp_check(mode, bond->params.xmit_policy); +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -1504,6 +1509,52 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } +static int bond_header_create(struct sk_buff *skb, struct net_device *bond_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct bonding *bond = netdev_priv(bond_dev); + const struct header_ops *slave_ops; + struct slave *slave; + int ret = 0; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (slave) { + slave_ops = READ_ONCE(slave->dev->header_ops); + if (slave_ops && slave_ops->create) + ret = slave_ops->create(skb, slave->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int bond_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) +{ + struct bonding *bond = netdev_priv(dev); + const struct header_ops *slave_ops; + struct slave *slave; + int ret = 0; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + if (slave) { + slave_ops = READ_ONCE(slave->dev->header_ops); + if (slave_ops && slave_ops->parse) + ret = slave_ops->parse(skb, slave->dev, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops bond_header_ops = { + .create = bond_header_create, + .parse = bond_header_parse, +}; + static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1511,7 +1562,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev, dev_close(bond_dev); - bond_dev->header_ops = slave_dev->header_ops; + bond_dev->header_ops = slave_dev->header_ops ? + &bond_header_ops : NULL; bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; @@ -2796,8 +2848,14 @@ static void bond_miimon_commit(struct bonding *bond) continue; + case BOND_LINK_FAIL: + case BOND_LINK_BACK: + slave_dbg(bond->dev, slave->dev, "link_new_state %d on slave\n", + slave->link_new_state); + continue; + default: - slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", + slave_err(bond->dev, slave->dev, "invalid link_new_state %d on slave\n", slave->link_new_state); bond_propose_link_state(slave, BOND_LINK_NOCHANGE); @@ -3372,7 +3430,7 @@ int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, } else if (is_arp) { return bond_arp_rcv(skb, bond, slave); #if IS_ENABLED(CONFIG_IPV6) - } else if (is_ipv6) { + } else if (is_ipv6 && likely(ipv6_mod_enabled())) { return bond_na_rcv(skb, bond, slave); #endif } else { @@ -5064,13 +5122,18 @@ static void bond_set_slave_arr(struct bonding *bond, { struct bond_up_slave *usable, *all; - usable = rtnl_dereference(bond->usable_slaves); - rcu_assign_pointer(bond->usable_slaves, usable_slaves); - kfree_rcu(usable, rcu); - all = rtnl_dereference(bond->all_slaves); rcu_assign_pointer(bond->all_slaves, all_slaves); kfree_rcu(all, rcu); + + if (BOND_MODE(bond) == BOND_MODE_BROADCAST) { + kfree_rcu(usable_slaves, rcu); + return; + } + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + kfree_rcu(usable, rcu); } static void bond_reset_slave_arr(struct bonding *bond) @@ -5263,7 +5326,7 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) continue; - if (bond_is_last_slave(bond, slave)) { + if (i + 1 == slaves_count) { skb2 = skb; skb_used = true; } else {
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dcee384..7380cc4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c
@@ -1575,6 +1575,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond, static int bond_option_xmit_hash_policy_set(struct bonding *bond, const struct bond_opt_value *newval) { + if (bond->xdp_prog && !__bond_xdp_check(BOND_MODE(bond), newval->value)) + return -EOPNOTSUPP; netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n", newval->string, newval->value); bond->params.xmit_policy = newval->value;
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index b908900..1873d82 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c
@@ -297,6 +297,7 @@ static void ser_release(struct work_struct *work) dev_close(ser->dev); unregister_netdevice(ser->dev); debugfs_deinit(ser); + tty_kref_put(tty->link); tty_kref_put(tty); } rtnl_unlock(); @@ -331,6 +332,7 @@ static int ldisc_open(struct tty_struct *tty) ser = netdev_priv(dev); ser->tty = tty_kref_get(tty); + tty_kref_get(tty->link); ser->dev = dev; debugfs_init(ser, tty); tty->receive_room = 4096; @@ -339,6 +341,7 @@ static int ldisc_open(struct tty_struct *tty) rtnl_lock(); result = register_netdevice(dev); if (result) { + tty_kref_put(tty->link); tty_kref_put(tty); rtnl_unlock(); free_netdev(dev);
diff --git a/drivers/net/can/dev/calc_bittiming.c b/drivers/net/can/dev/calc_bittiming.c index cc40222..42498e9 100644 --- a/drivers/net/can/dev/calc_bittiming.c +++ b/drivers/net/can/dev/calc_bittiming.c
@@ -8,7 +8,7 @@ #include <linux/units.h> #include <linux/can/dev.h> -#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ +#define CAN_CALC_MAX_ERROR 500 /* max error 5% */ /* CiA recommended sample points for Non Return to Zero encoding. */ static int can_calc_sample_point_nrz(const struct can_bittiming *bt)
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0498198..766d455 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c
@@ -601,7 +601,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* We need synchronization with dev->stop() */ ASSERT_RTNL(); - can_ctrlmode_changelink(dev, data, extack); + err = can_ctrlmode_changelink(dev, data, extack); + if (err) + return err; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt;
diff --git a/drivers/net/can/dummy_can.c b/drivers/net/can/dummy_can.c index 4195365..cd23de4 100644 --- a/drivers/net/can/dummy_can.c +++ b/drivers/net/can/dummy_can.c
@@ -241,6 +241,7 @@ static int __init dummy_can_init(void) dev->netdev_ops = &dummy_can_netdev_ops; dev->ethtool_ops = &dummy_can_ethtool_ops; + dev->flags |= IFF_ECHO; /* enable echo handling */ priv = netdev_priv(dev); priv->can.bittiming_const = &dummy_can_bittiming_const; priv->can.bitrate_max = 20 * MEGA /* BPS */;
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index e00d3db..91b1fa9 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c
@@ -755,7 +755,9 @@ static int hi3110_open(struct net_device *net) return ret; mutex_lock(&priv->hi3110_lock); - hi3110_power_enable(priv->transceiver, 1); + ret = hi3110_power_enable(priv->transceiver, 1); + if (ret) + goto out_close_candev; priv->force_quit = 0; priv->tx_skb = NULL; @@ -790,6 +792,7 @@ static int hi3110_open(struct net_device *net) hi3110_hw_sleep(spi); out_close: hi3110_power_enable(priv->transceiver, 0); + out_close_candev: close_candev(net); mutex_unlock(&priv->hi3110_lock); return ret;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index fa97adf..0d0190a 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c
@@ -1214,6 +1214,7 @@ static int mcp251x_open(struct net_device *net) { struct mcp251x_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; + bool release_irq = false; unsigned long flags = 0; int ret; @@ -1224,7 +1225,11 @@ static int mcp251x_open(struct net_device *net) } mutex_lock(&priv->mcp_lock); - mcp251x_power_enable(priv->transceiver, 1); + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(&spi->dev, "failed to enable transceiver power: %pe\n", ERR_PTR(ret)); + goto out_close_candev; + } priv->force_quit = 0; priv->tx_skb = NULL; @@ -1257,12 +1262,25 @@ static int mcp251x_open(struct net_device *net) return 0; out_free_irq: - free_irq(spi->irq, priv); + /* The IRQ handler might be running, and if so it will be waiting + * for the lock. But free_irq() must wait for the handler to finish + * so calling it here would deadlock. + * + * Setting priv->force_quit will let the handler exit right away + * without any access to the hardware. This make it safe to call + * free_irq() after the lock is released. + */ + priv->force_quit = 1; + release_irq = true; + mcp251x_hw_sleep(spi); out_close: mcp251x_power_enable(priv->transceiver, 0); +out_close_candev: close_candev(net); mutex_unlock(&priv->mcp_lock); + if (release_irq) + free_irq(spi->irq, priv); return ret; } @@ -1503,11 +1521,25 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) { struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); + int ret = 0; - if (priv->after_suspend & AFTER_SUSPEND_POWER) - mcp251x_power_enable(priv->power, 1); - if (priv->after_suspend & AFTER_SUSPEND_UP) - mcp251x_power_enable(priv->transceiver, 1); + if (priv->after_suspend & AFTER_SUSPEND_POWER) { + ret = mcp251x_power_enable(priv->power, 1); + if (ret) { + dev_err(dev, "failed to restore power: %pe\n", ERR_PTR(ret)); + return ret; + } + } + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + ret = mcp251x_power_enable(priv->transceiver, 1); + if (ret) { + dev_err(dev, "failed to restore transceiver power: %pe\n", ERR_PTR(ret)); + if (priv->after_suspend & AFTER_SUSPEND_POWER) + mcp251x_power_enable(priv->power, 0); + return ret; + } + } if (priv->after_suspend & (AFTER_SUSPEND_POWER | AFTER_SUSPEND_UP)) queue_work(priv->wq, &priv->restart_work);
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 4c219a5..9b25dda 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c
@@ -445,6 +445,11 @@ static void ems_usb_read_bulk_callback(struct urb *urb) start = CPC_HEADER_SIZE; while (msg_count) { + if (start + CPC_MSG_HEADER_LEN > urb->actual_length) { + netdev_err(netdev, "format error\n"); + break; + } + msg = (struct ems_cpc_msg *)&ibuf[start]; switch (msg->type) { @@ -474,7 +479,7 @@ static void ems_usb_read_bulk_callback(struct urb *urb) start += CPC_MSG_HEADER_LEN + msg->length; msg_count--; - if (start > urb->transfer_buffer_length) { + if (start > urb->actual_length) { netdev_err(netdev, "format error\n"); break; }
diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 2892a68..d257440 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c
@@ -272,6 +272,9 @@ struct esd_usb { struct usb_anchor rx_submitted; + unsigned int rx_pipe; + unsigned int tx_pipe; + int net_count; u32 version; int rxinitdone; @@ -537,7 +540,7 @@ static void esd_usb_read_bulk_callback(struct urb *urb) } resubmit_urb: - usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); @@ -626,9 +629,7 @@ static int esd_usb_send_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; - return usb_bulk_msg(dev->udev, - usb_sndbulkpipe(dev->udev, 2), - msg, + return usb_bulk_msg(dev->udev, dev->tx_pipe, msg, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ &actual_length, 1000); @@ -639,12 +640,8 @@ static int esd_usb_wait_msg(struct esd_usb *dev, { int actual_length; - return usb_bulk_msg(dev->udev, - usb_rcvbulkpipe(dev->udev, 1), - msg, - sizeof(*msg), - &actual_length, - 1000); + return usb_bulk_msg(dev->udev, dev->rx_pipe, msg, + sizeof(*msg), &actual_length, 1000); } static int esd_usb_setup_rx_urbs(struct esd_usb *dev) @@ -677,8 +674,7 @@ static int esd_usb_setup_rx_urbs(struct esd_usb *dev) urb->transfer_dma = buf_dma; - usb_fill_bulk_urb(urb, dev->udev, - usb_rcvbulkpipe(dev->udev, 1), + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, buf, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -903,7 +899,7 @@ static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, /* hnd must not be 0 - MSB is stripped in txdone handling */ msg->tx.hnd = BIT(31) | i; /* returned in TX done message */ - usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, + usb_fill_bulk_urb(urb, dev->udev, dev->tx_pipe, buf, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ esd_usb_write_bulk_callback, context); @@ -1298,10 +1294,16 @@ static int esd_usb_probe_one_net(struct usb_interface *intf, int index) static int esd_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct esd_usb *dev; union esd_usb_msg *msg; int i, err; + err = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, + NULL, NULL); + if (err) + return err; + dev = kzalloc_obj(*dev); if (!dev) { err = -ENOMEM; @@ -1309,6 +1311,8 @@ static int esd_usb_probe(struct usb_interface *intf, } dev->udev = interface_to_usbdev(intf); + dev->rx_pipe = usb_rcvbulkpipe(dev->udev, ep_in->bEndpointAddress); + dev->tx_pipe = usb_sndbulkpipe(dev->udev, ep_out->bEndpointAddress); init_usb_anchor(&dev->rx_submitted);
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 2d248de..b259f61 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -1461,12 +1461,18 @@ static void es58x_read_bulk_callback(struct urb *urb) } resubmit_urb: + usb_anchor_urb(urb, &es58x_dev->rx_urbs); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + + usb_unanchor_urb(urb); + if (ret == -ENODEV) { for (i = 0; i < es58x_dev->num_can_ch; i++) if (es58x_dev->netdev[i]) netif_device_detach(es58x_dev->netdev[i]); - } else if (ret) + } else dev_err_ratelimited(dev, "Failed resubmitting read bulk urb: %pe\n", ERR_PTR(ret));
diff --git a/drivers/net/can/usb/f81604.c b/drivers/net/can/usb/f81604.c index 7657806..f123182 100644 --- a/drivers/net/can/usb/f81604.c +++ b/drivers/net/can/usb/f81604.c
@@ -413,6 +413,7 @@ static void f81604_read_bulk_callback(struct urb *urb) { struct f81604_can_frame *frame = urb->transfer_buffer; struct net_device *netdev = urb->context; + struct f81604_port_priv *priv = netdev_priv(netdev); int ret; if (!netif_device_present(netdev)) @@ -445,10 +446,15 @@ static void f81604_read_bulk_callback(struct urb *urb) f81604_process_rx_packet(netdev, frame); resubmit_urb: + usb_anchor_urb(urb, &priv->urbs_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + usb_unanchor_urb(urb); + if (ret == -ENODEV) netif_device_detach(netdev); - else if (ret) + else netdev_err(netdev, "%s: failed to resubmit read bulk urb: %pe\n", __func__, ERR_PTR(ret)); @@ -620,6 +626,12 @@ static void f81604_read_int_callback(struct urb *urb) netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__, ERR_PTR(urb->status)); + if (urb->actual_length < sizeof(*data)) { + netdev_warn(netdev, "%s: short int URB: %u < %zu\n", + __func__, urb->actual_length, sizeof(*data)); + goto resubmit_urb; + } + switch (urb->status) { case 0: /* success */ break; @@ -646,10 +658,15 @@ static void f81604_read_int_callback(struct urb *urb) f81604_handle_tx(priv, data); resubmit_urb: + usb_anchor_urb(urb, &priv->urbs_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); + if (!ret) + return; + usb_unanchor_urb(urb); + if (ret == -ENODEV) netif_device_detach(netdev); - else if (ret) + else netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n", __func__, ERR_PTR(ret)); } @@ -874,9 +891,27 @@ static void f81604_write_bulk_callback(struct urb *urb) if (!netif_device_present(netdev)) return; - if (urb->status) - netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__, - ERR_PTR(urb->status)); + if (!urb->status) + return; + + switch (urb->status) { + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + return; + default: + break; + } + + if (net_ratelimit()) + netdev_err(netdev, "%s: Tx URB error: %pe\n", __func__, + ERR_PTR(urb->status)); + + can_free_echo_skb(netdev, 0, NULL); + netdev->stats.tx_dropped++; + netdev->stats.tx_errors++; + + netif_wake_queue(netdev); } static void f81604_clear_reg_work(struct work_struct *work)
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 9d27d6f..ec9a7cb 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c
@@ -772,9 +772,8 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) } } -static int gs_usb_set_bittiming(struct net_device *netdev) +static int gs_usb_set_bittiming(struct gs_can *dev) { - struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), @@ -791,9 +790,8 @@ static int gs_usb_set_bittiming(struct net_device *netdev) GFP_KERNEL); } -static int gs_usb_set_data_bittiming(struct net_device *netdev) +static int gs_usb_set_data_bittiming(struct gs_can *dev) { - struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.fd.data_bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), @@ -1057,6 +1055,20 @@ static int gs_can_open(struct net_device *netdev) if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) flags |= GS_CAN_MODE_HW_TIMESTAMP; + rc = gs_usb_set_bittiming(dev); + if (rc) { + netdev_err(netdev, "failed to set bittiming: %pe\n", ERR_PTR(rc)); + goto out_usb_kill_anchored_urbs; + } + + if (ctrlmode & CAN_CTRLMODE_FD) { + rc = gs_usb_set_data_bittiming(dev); + if (rc) { + netdev_err(netdev, "failed to set data bittiming: %pe\n", ERR_PTR(rc)); + goto out_usb_kill_anchored_urbs; + } + } + /* finally start device */ dev->can.state = CAN_STATE_ERROR_ACTIVE; dm.flags = cpu_to_le32(flags); @@ -1370,7 +1382,6 @@ static struct gs_can *gs_make_candev(unsigned int channel, dev->can.state = CAN_STATE_STOPPED; dev->can.clock.freq = le32_to_cpu(bt_const.fclk_can); dev->can.bittiming_const = &dev->bt_const; - dev->can.do_set_bittiming = gs_usb_set_bittiming; dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; @@ -1394,7 +1405,6 @@ static struct gs_can *gs_make_candev(unsigned int channel, * GS_CAN_FEATURE_BT_CONST_EXT is set. */ dev->can.fd.data_bittiming_const = &dev->bt_const; - dev->can.fd.do_set_data_bittiming = gs_usb_set_data_bittiming; } if (feature & GS_CAN_FEATURE_TERMINATION) {
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index c79508b..0ea0ac7 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c
@@ -748,7 +748,7 @@ static void ucan_read_bulk_callback(struct urb *urb) len = le16_to_cpu(m->len); /* check sanity (length of content) */ - if (urb->actual_length - pos < len) { + if ((len == 0) || (urb->actual_length - pos < len)) { netdev_warn(up->netdev, "invalid message (short; no data; l:%d)\n", urb->actual_length);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 9606855..de3efa3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c
@@ -980,15 +980,19 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) ret = bcm_sf2_sw_rst(priv); if (ret) { pr_err("%s: failed to software reset switch\n", __func__); + if (!priv->wol_ports_mask) + clk_disable_unprepare(priv->clk); return ret; } bcm_sf2_crossbar_setup(priv); ret = bcm_sf2_cfp_resume(ds); - if (ret) + if (ret) { + if (!priv->wol_ports_mask) + clk_disable_unprepare(priv->clk); return ret; - + } if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true);
diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 4a2cc57..8b98039 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c
@@ -1108,6 +1108,7 @@ static int ksz_ptp_msg_irq_setup(struct ksz_port *port, u8 n) const struct ksz_dev_ops *ops = port->ksz_dev->dev_ops; struct ksz_irq *ptpirq = &port->ptpirq; struct ksz_ptp_irq *ptpmsg_irq; + int ret; ptpmsg_irq = &port->ptpmsg_irq[n]; ptpmsg_irq->num = irq_create_mapping(ptpirq->domain, n); @@ -1119,9 +1120,13 @@ static int ksz_ptp_msg_irq_setup(struct ksz_port *port, u8 n) strscpy(ptpmsg_irq->name, name[n]); - return request_threaded_irq(ptpmsg_irq->num, NULL, - ksz_ptp_msg_thread_fn, IRQF_ONESHOT, - ptpmsg_irq->name, ptpmsg_irq); + ret = request_threaded_irq(ptpmsg_irq->num, NULL, + ksz_ptp_msg_thread_fn, IRQF_ONESHOT, + ptpmsg_irq->name, ptpmsg_irq); + if (ret) + irq_dispose_mapping(ptpmsg_irq->num); + + return ret; } int ksz_ptp_irq_setup(struct dsa_switch *ds, u8 p)
diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index b1e2094..d7ab04f 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c
@@ -149,7 +149,6 @@ static int mxl862xx_setup_mdio(struct dsa_switch *ds) return -ENOMEM; bus->priv = priv; - ds->user_mii_bus = bus; bus->name = KBUILD_MODNAME "-mii"; snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(dev)); bus->read_c45 = mxl862xx_phy_read_c45_mii_bus;
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index c575e16..31fa94d 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -769,7 +769,7 @@ static int rtl8365mb_phy_ocp_write(struct realtek_priv *priv, int phy, out: rtl83xx_unlock(priv); - return 0; + return ret; } static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum) @@ -1480,8 +1480,7 @@ static void rtl8365mb_stats_update(struct realtek_priv *priv, int port) stats->rx_packets = cnt[RTL8365MB_MIB_ifInUcastPkts] + cnt[RTL8365MB_MIB_ifInMulticastPkts] + - cnt[RTL8365MB_MIB_ifInBroadcastPkts] - - cnt[RTL8365MB_MIB_ifOutDiscards]; + cnt[RTL8365MB_MIB_ifInBroadcastPkts]; stats->tx_packets = cnt[RTL8365MB_MIB_ifOutUcastPkts] + cnt[RTL8365MB_MIB_ifOutMulticastPkts] +
diff --git a/drivers/net/dsa/realtek/rtl8366rb-leds.c b/drivers/net/dsa/realtek/rtl8366rb-leds.c index 99c8906..509ffd3f 100644 --- a/drivers/net/dsa/realtek/rtl8366rb-leds.c +++ b/drivers/net/dsa/realtek/rtl8366rb-leds.c
@@ -12,11 +12,11 @@ static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) case 0: return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); case 1: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_X_1_CTRL_MASK, BIT(port)); case 2: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_2_X_CTRL_MASK, BIT(port)); case 3: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + return FIELD_PREP(RTL8366RB_LED_X_3_CTRL_MASK, BIT(port)); default: return 0; }
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 71a817c..c72c2bf 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2278,6 +2278,12 @@ int sja1105_static_config_reload(struct sja1105_private *priv, * change it through the dynamic interface later. */ dsa_switch_for_each_available_port(dp, ds) { + /* May be called during unbind when we unoffload a VLAN-aware + * bridge from port 1 while port 0 was already torn down + */ + if (!dp->pl) + continue; + phylink_replay_link_begin(dp->pl); mac[dp->index].speed = priv->info->port_speed[SJA1105_SPEED_AUTO]; } @@ -2333,13 +2339,13 @@ int sja1105_static_config_reload(struct sja1105_private *priv, goto out; } - dsa_switch_for_each_available_port(dp, ds) - phylink_replay_link_end(dp->pl); - rc = sja1105_reload_cbs(priv); - if (rc < 0) - goto out; + out: + dsa_switch_for_each_available_port(dp, ds) + if (dp->pl) + phylink_replay_link_end(dp->pl); + mutex_unlock(&priv->mgmt_lock); mutex_unlock(&priv->fdb_lock);
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 62bcbbb..95ba99b 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -794,18 +794,34 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q, static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q) { - struct airoha_eth *eth = q->qdma->eth; + struct airoha_qdma *qdma = q->qdma; + struct airoha_eth *eth = qdma->eth; + int qid = q - &qdma->q_rx[0]; while (q->queued) { struct airoha_queue_entry *e = &q->entry[q->tail]; + struct airoha_qdma_desc *desc = &q->desc[q->tail]; struct page *page = virt_to_head_page(e->buf); dma_sync_single_for_cpu(eth->dev, e->dma_addr, e->dma_len, page_pool_get_dma_dir(q->page_pool)); page_pool_put_full_page(q->page_pool, page, false); + /* Reset DMA descriptor */ + WRITE_ONCE(desc->ctrl, 0); + WRITE_ONCE(desc->addr, 0); + WRITE_ONCE(desc->data, 0); + WRITE_ONCE(desc->msg0, 0); + WRITE_ONCE(desc->msg1, 0); + WRITE_ONCE(desc->msg2, 0); + WRITE_ONCE(desc->msg3, 0); + q->tail = (q->tail + 1) % q->ndesc; q->queued--; } + + q->head = q->tail; + airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK, + FIELD_PREP(RX_RING_DMA_IDX_MASK, q->tail)); } static int airoha_qdma_init_rx(struct airoha_qdma *qdma) @@ -2946,6 +2962,8 @@ static int airoha_register_gdm_devices(struct airoha_eth *eth) return err; } + set_bit(DEV_STATE_REGISTERED, ð->state); + return 0; } @@ -3083,7 +3101,6 @@ static void airoha_remove(struct platform_device *pdev) if (!port) continue; - airoha_dev_stop(port->dev); unregister_netdev(port->dev); airoha_metadata_dst_free(port); }
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 20e602d..a979035 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -88,6 +88,7 @@ enum { enum { DEV_STATE_INITIALIZED, + DEV_STATE_REGISTERED, }; enum {
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 42dbe8f..c2c32b6 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -227,7 +227,9 @@ static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, if (!dev) return -ENODEV; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; @@ -1366,6 +1368,13 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) struct airoha_eth *eth = ppe->eth; int err = 0; + /* Netfilter flowtable can try to offload flower rules while not all + * the net_devices are registered or initialized. Delay offloading + * until all net_devices are registered in the system. + */ + if (!test_bit(DEV_STATE_REGISTERED, ð->state)) + return -EBUSY; + mutex_lock(&flow_offload_mutex); if (!eth->npu)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 711f295..80c2c27 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -431,7 +431,7 @@ #define MAC_SSIR_SSINC_INDEX 16 #define MAC_SSIR_SSINC_WIDTH 8 #define MAC_TCR_SS_INDEX 29 -#define MAC_TCR_SS_WIDTH 2 +#define MAC_TCR_SS_WIDTH 3 #define MAC_TCR_TE_INDEX 0 #define MAC_TCR_TE_WIDTH 1 #define MAC_TCR_VNE_INDEX 24
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 62bb4b8..23beea4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1120,7 +1120,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned long flags; DBGPR("-->xgbe_powerdown\n"); @@ -1131,8 +1130,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) return -EINVAL; } - spin_lock_irqsave(&pdata->lock, flags); - if (caller == XGMAC_DRIVER_CONTEXT) netif_device_detach(netdev); @@ -1148,8 +1145,6 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) pdata->power_down = 1; - spin_unlock_irqrestore(&pdata->lock, flags); - DBGPR("<--xgbe_powerdown\n"); return 0; @@ -1159,7 +1154,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned long flags; DBGPR("-->xgbe_powerup\n"); @@ -1170,8 +1164,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) return -EINVAL; } - spin_lock_irqsave(&pdata->lock, flags); - pdata->power_down = 0; xgbe_napi_enable(pdata, 0); @@ -1186,8 +1178,6 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) xgbe_start_timers(pdata); - spin_unlock_irqrestore(&pdata->lock, flags); - DBGPR("<--xgbe_powerup\n"); return 0; @@ -1281,20 +1271,25 @@ static int xgbe_start(struct xgbe_prv_data *pdata) if (ret) goto err_napi; + /* Reset the phy settings */ + ret = xgbe_phy_reset(pdata); + if (ret) + goto err_irqs; + + /* Start the phy */ ret = phy_if->phy_start(pdata); if (ret) goto err_irqs; hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); + /* Synchronize flag with hardware state after enabling TX/RX. + * This prevents stale state after device restart cycles. + */ + pdata->data_path_stopped = false; udp_tunnel_nic_reset_ntf(netdev); - /* Reset the phy settings */ - ret = xgbe_phy_reset(pdata); - if (ret) - goto err_txrx; - netif_tx_start_all_queues(netdev); xgbe_start_timers(pdata); @@ -1304,10 +1299,6 @@ static int xgbe_start(struct xgbe_prv_data *pdata) return 0; -err_txrx: - hw_if->disable_rx(pdata); - hw_if->disable_tx(pdata); - err_irqs: xgbe_free_irqs(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index d1f0419..7d45ea2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -76,7 +76,6 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev) pdata->netdev = netdev; pdata->dev = dev; - spin_lock_init(&pdata->lock); spin_lock_init(&pdata->xpcs_lock); mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index c63ddb12..b8cf6cc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1942,7 +1942,7 @@ static void xgbe_set_rx_adap_mode(struct xgbe_prv_data *pdata, static void xgbe_rx_adaptation(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; + int reg; /* step 2: force PCS to send RX_ADAPT Req to PHY */ XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_EQ_CTRL4, @@ -1964,11 +1964,20 @@ static void xgbe_rx_adaptation(struct xgbe_prv_data *pdata) /* Step 4: Check for Block lock */ - /* Link status is latched low, so read once to clear - * and then read again to get current state + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + goto set_mode; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + goto set_mode; + } + if (reg & MDIO_STAT1_LSTATUS) { /* If the block lock is found, update the helpers * and declare the link up @@ -2008,6 +2017,48 @@ static void xgbe_phy_rx_adaptation(struct xgbe_prv_data *pdata) xgbe_rx_adaptation(pdata); } +/* + * xgbe_phy_stop_data_path - Stop TX/RX to prevent packet corruption + * @pdata: driver private data + * + * This function stops the data path (TX and RX) to prevent packet + * corruption during critical PHY operations like RX adaptation. + * Must be called before initiating RX adaptation when link goes down. + */ +static void xgbe_phy_stop_data_path(struct xgbe_prv_data *pdata) +{ + if (pdata->data_path_stopped) + return; + + /* Stop TX/RX to prevent packet corruption during RX adaptation */ + pdata->hw_if.disable_tx(pdata); + pdata->hw_if.disable_rx(pdata); + pdata->data_path_stopped = true; + + netif_dbg(pdata, link, pdata->netdev, + "stopping data path for RX adaptation\n"); +} + +/* + * xgbe_phy_start_data_path - Re-enable TX/RX after RX adaptation + * @pdata: driver private data + * + * This function re-enables the data path (TX and RX) after RX adaptation + * has completed successfully. Only called when link is confirmed up. + */ +static void xgbe_phy_start_data_path(struct xgbe_prv_data *pdata) +{ + if (!pdata->data_path_stopped) + return; + + pdata->hw_if.enable_rx(pdata); + pdata->hw_if.enable_tx(pdata); + pdata->data_path_stopped = false; + + netif_dbg(pdata, link, pdata->netdev, + "restarting data path after RX adaptation\n"); +} + static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) { int reg; @@ -2801,13 +2852,27 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, * declare link up + * + * Note: When link is up and adaptation is done, we can + * safely re-enable the data path if it was stopped + * for adaptation. */ - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) { + xgbe_phy_start_data_path(pdata); return 1; + } /* If either link is not available or adaptation is not done, * retrigger the adaptation logic. (if the mode is not set, * then issue mailbox command first) */ + + /* CRITICAL: Stop data path BEFORE triggering RX adaptation + * to prevent CRC errors from packets corrupted during + * the adaptation process. This is especially important + * when AN is OFF in 10G KR mode. + */ + xgbe_phy_stop_data_path(pdata); + if (pdata->mode_set) { xgbe_phy_rx_adaptation(pdata); } else { @@ -2815,8 +2880,11 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - if (pdata->rx_adapt_done) + if (pdata->rx_adapt_done) { + /* Adaptation complete, safe to re-enable data path */ + xgbe_phy_start_data_path(pdata); return 1; + } } else if (reg & MDIO_STAT1_LSTATUS) return 1;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 1269b8c..438033a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1004,9 +1004,6 @@ struct xgbe_prv_data { unsigned int pp3; unsigned int pp4; - /* Overall device lock */ - spinlock_t lock; - /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; unsigned int xpcs_window_def_reg; @@ -1246,6 +1243,10 @@ struct xgbe_prv_data { bool en_rx_adap; int rx_adapt_retries; bool rx_adapt_done; + /* Flag to track if data path (TX/RX) was stopped for RX adaptation. + * This prevents packet corruption during the adaptation window. + */ + bool data_path_stopped; bool mode_set; bool sph; };
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 8283aee..dde4046 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c
@@ -934,6 +934,17 @@ int arc_emac_probe(struct net_device *ndev, int interface) /* Set poll rate so that it polls every 1 ms */ arc_reg_set(priv, R_POLLRATE, clock_frequency / 1000000); + /* + * Put the device into a known quiescent state before requesting + * the IRQ. Clear only EMAC interrupt status bits here; leave the + * MDIO completion bit alone and avoid writing TXPL_MASK, which is + * used to force TX polling rather than acknowledge interrupts. + */ + arc_reg_set(priv, R_ENABLE, 0); + arc_reg_set(priv, R_STATUS, RXINT_MASK | TXINT_MASK | ERR_MASK | + TXCH_MASK | MSER_MASK | RXCR_MASK | + RXFR_MASK | RXFL_MASK); + ndev->irq = irq; dev_info(dev, "IRQ is %d\n", ndev->irq);
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index cd7ddde..9787c18 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -25,7 +25,7 @@ select SSB select MII select PHYLIB - select FIXED_PHY if BCM47XX + select FIXED_PHY help If you have a network (Ethernet) controller of this type, say Y or M here.
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index aa6d860..9724748 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1152,12 +1152,6 @@ void bcmasp_enable_wol(struct bcmasp_intf *intf, bool en) } } -static void bcmasp_wol_irq_destroy(struct bcmasp_priv *priv) -{ - if (priv->wol_irq > 0) - free_irq(priv->wol_irq, priv); -} - static void bcmasp_eee_fixup(struct bcmasp_intf *intf, bool en) { u32 reg, phy_lpi_overwrite; @@ -1255,7 +1249,7 @@ static int bcmasp_probe(struct platform_device *pdev) if (priv->irq <= 0) return -EINVAL; - priv->clk = devm_clk_get_optional_enabled(dev, "sw_asp"); + priv->clk = devm_clk_get_optional(dev, "sw_asp"); if (IS_ERR(priv->clk)) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to request clock\n"); @@ -1283,6 +1277,10 @@ static int bcmasp_probe(struct platform_device *pdev) bcmasp_set_pdata(priv, pdata); + ret = clk_prepare_enable(priv->clk); + if (ret) + return dev_err_probe(dev, ret, "failed to start clock\n"); + /* Enable all clocks to ensure successful probing */ bcmasp_core_clock_set(priv, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE, 0); @@ -1294,8 +1292,10 @@ static int bcmasp_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, priv->irq, bcmasp_isr, 0, pdev->name, priv); - if (ret) - return dev_err_probe(dev, ret, "failed to request ASP interrupt: %d", ret); + if (ret) { + dev_err(dev, "Failed to request ASP interrupt: %d", ret); + goto err_clock_disable; + } /* Register mdio child nodes */ of_platform_populate(dev->of_node, bcmasp_mdio_of_match, NULL, dev); @@ -1307,13 +1307,17 @@ static int bcmasp_probe(struct platform_device *pdev) priv->mda_filters = devm_kcalloc(dev, priv->num_mda_filters, sizeof(*priv->mda_filters), GFP_KERNEL); - if (!priv->mda_filters) - return -ENOMEM; + if (!priv->mda_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } priv->net_filters = devm_kcalloc(dev, priv->num_net_filters, sizeof(*priv->net_filters), GFP_KERNEL); - if (!priv->net_filters) - return -ENOMEM; + if (!priv->net_filters) { + ret = -ENOMEM; + goto err_clock_disable; + } bcmasp_core_init_filters(priv); @@ -1322,7 +1326,8 @@ static int bcmasp_probe(struct platform_device *pdev) ports_node = of_find_node_by_name(dev->of_node, "ethernet-ports"); if (!ports_node) { dev_warn(dev, "No ports found\n"); - return -EINVAL; + ret = -EINVAL; + goto err_clock_disable; } i = 0; @@ -1344,8 +1349,6 @@ static int bcmasp_probe(struct platform_device *pdev) */ bcmasp_core_clock_set(priv, 0, ASP_CTRL_CLOCK_CTRL_ASP_ALL_DISABLE); - clk_disable_unprepare(priv->clk); - /* Now do the registration of the network ports which will take care * of managing the clock properly. */ @@ -1358,13 +1361,16 @@ static int bcmasp_probe(struct platform_device *pdev) count++; } + clk_disable_unprepare(priv->clk); + dev_info(dev, "Initialized %d port(s)\n", count); return ret; err_cleanup: - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); +err_clock_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -1376,7 +1382,6 @@ static void bcmasp_remove(struct platform_device *pdev) if (!priv) return; - bcmasp_wol_irq_destroy(priv); bcmasp_remove_intfs(priv); }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e062d5d..3f77519 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2929,6 +2929,8 @@ static int bnxt_async_event_process(struct bnxt *bp, u16 type = (u16)BNXT_EVENT_BUF_PRODUCER_TYPE(data1); u32 offset = BNXT_EVENT_BUF_PRODUCER_OFFSET(data2); + if (type >= ARRAY_SIZE(bp->bs_trace)) + goto async_event_process_exit; bnxt_bs_trace_check_wrap(&bp->bs_trace[type], offset); goto async_event_process_exit; } @@ -6232,6 +6234,9 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, int rc; set_bit(BNXT_FLTR_FW_DELETED, &fltr->base.state); + if (!test_bit(BNXT_STATE_OPEN, &bp->state)) + return 0; + rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE); if (rc) return rc; @@ -8040,6 +8045,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); if (!ulp_msix) bnxt_set_ulp_stat_ctxs(bp, 0); + else + bnxt_set_dflt_ulp_stat_ctxs(bp); if (ulp_msix > bp->ulp_num_msix_want) ulp_msix = bp->ulp_num_msix_want; @@ -8666,7 +8673,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) struct hwrm_func_backing_store_qcaps_v2_output *resp; struct hwrm_func_backing_store_qcaps_v2_input *req; struct bnxt_ctx_mem_info *ctx = bp->ctx; - u16 type; + u16 type, next_type = 0; int rc; rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2); @@ -8682,7 +8689,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) resp = hwrm_req_hold(bp, req); - for (type = 0; type < BNXT_CTX_V2_MAX; ) { + for (type = 0; type < BNXT_CTX_V2_MAX; type = next_type) { struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; u8 init_val, init_off, i; u32 max_entries; @@ -8695,7 +8702,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) if (rc) goto ctx_done; flags = le32_to_cpu(resp->flags); - type = le16_to_cpu(resp->next_valid_type); + next_type = le16_to_cpu(resp->next_valid_type); if (!(flags & BNXT_CTX_MEM_TYPE_VALID)) { bnxt_free_one_ctx_mem(bp, ctxm, true); continue; @@ -8710,7 +8717,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) else continue; } - ctxm->type = le16_to_cpu(resp->type); + ctxm->type = type; ctxm->entry_size = entry_size; ctxm->flags = flags; ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map); @@ -10879,12 +10886,10 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, struct bnxt_ntuple_filter *ntp_fltr; int i; - if (netif_running(bp->dev)) { - bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); - for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { - if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); - } + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); } if (!all) return; @@ -12989,6 +12994,21 @@ static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; } +static void bnxt_set_xdp_tx_rings(struct bnxt *bp) +{ + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; +} + +static void bnxt_adj_tx_rings(struct bnxt *bp) +{ + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) + bnxt_set_xdp_tx_rings(bp); +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -13006,13 +13026,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; - /* Make adjustments if reserved TX rings are less than requested */ - bp->tx_nr_rings -= bp->tx_nr_rings_xdp; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); - if (bp->tx_nr_rings_xdp) { - bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; - bp->tx_nr_rings += bp->tx_nr_rings_xdp; - } + bnxt_adj_tx_rings(bp); rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); @@ -15433,11 +15447,19 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } +void bnxt_set_cp_rings(struct bnxt *bp, bool sh) +{ + int tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + + bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; +} + int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) { struct bnxt *bp = netdev_priv(dev); bool sh = false; - int rc, tx_cp; + int rc; if (tc > bp->max_tc) { netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n", @@ -15470,9 +15492,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) bp->num_tc = 0; } bp->tx_nr_rings += bp->tx_nr_rings_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); if (netif_running(bp->dev)) return bnxt_open_nic(bp, true, false); @@ -16522,6 +16542,19 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } +static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh) +{ + if (sh) + bnxt_trim_dflt_sh_rings(bp); + else + bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + if (sh && READ_ONCE(bp->xdp_prog)) { + bnxt_set_xdp_tx_rings(bp); + bnxt_set_cp_rings(bp, true); + } +} + static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; @@ -16547,11 +16580,8 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) return rc; bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); - if (sh) - bnxt_trim_dflt_sh_rings(bp); - else - bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + + bnxt_adj_dflt_rings(bp, sh); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { @@ -16564,16 +16594,17 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + + bnxt_adj_tx_rings(bp); if (sh) - bnxt_trim_dflt_sh_rings(bp); + bnxt_adj_dflt_rings(bp, true); /* Rings may have been trimmed, re-reserve the trimmed rings. */ if (bnxt_need_reserve_rings(bp)) { rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16607,7 +16638,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); bnxt_set_dflt_rfs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9a41b9e..4bc7f7a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2146,7 +2146,7 @@ enum board_idx { }; #define BNXT_TRACE_BUF_MAGIC_BYTE ((u8)0xbc) -#define BNXT_TRACE_MAX 11 +#define BNXT_TRACE_MAX (DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE + 1) struct bnxt_bs_trace_info { u8 *magic_byte; @@ -2985,6 +2985,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); int bnxt_fw_init_one(struct bnxt *bp); bool bnxt_hwrm_reset_permitted(struct bnxt *bp); +void bnxt_set_cp_rings(struct bnxt *bp, bool sh); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); struct bnxt_ntuple_filter *bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp, struct bnxt_ntuple_filter *fltr, u32 idx);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index ba47e82..0407aa1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -945,7 +945,6 @@ static int bnxt_set_channels(struct net_device *dev, bool sh = false; int tx_xdp = 0; int rc = 0; - int tx_cp; if (channel->other_count) return -EINVAL; @@ -979,8 +978,8 @@ static int bnxt_set_channels(struct net_device *dev, if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && - netif_is_rxfh_configured(dev)) { - netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n"); + (netif_is_rxfh_configured(dev) || bp->num_rss_ctx)) { + netdev_warn(dev, "RSS table size change required, RSS table entries must be default (with no additional RSS contexts present) to proceed\n"); return -EINVAL; } @@ -1013,9 +1012,7 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); /* After changing number of rx channels, update NTUPLE feature. */ netdev_update_features(dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 85cbeb3..bebe37e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -384,7 +384,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; - int tx_xdp = 0, tx_cp, rc, tc; + int tx_xdp = 0, rc, tc; struct bpf_prog *old; netdev_assert_locked(dev); @@ -431,8 +431,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) } bp->tx_nr_rings_xdp = tx_xdp; bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings); + bnxt_set_cp_rings(bp, true); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a71cd72..482a31e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1342,8 +1342,7 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, } } -void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, - bool tx_lpi_enabled) +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 off = priv->hw_params->tbuf_offset + TBUF_ENERGY_CTRL; @@ -1363,7 +1362,7 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, /* Enable EEE and switch to a 27Mhz clock automatically */ reg = bcmgenet_readl(priv->base + off); - if (tx_lpi_enabled) + if (enable) reg |= TBUF_EEE_EN | TBUF_PM_EN; else reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); @@ -1382,14 +1381,12 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, priv->clk_eee_enabled = false; } - priv->eee.eee_enabled = enable; - priv->eee.tx_lpi_enabled = tx_lpi_enabled; } static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_keee *p = &priv->eee; + int ret; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1397,17 +1394,21 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e) if (!dev->phydev) return -ENODEV; - e->tx_lpi_enabled = p->tx_lpi_enabled; + ret = phy_ethtool_get_eee(dev->phydev, e); + if (ret) + return ret; + + /* tx_lpi_timer is maintained by the MAC hardware register; the + * PHY-level eee_cfg timer is not set for GENET. + */ e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(dev->phydev, e); + return 0; } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_keee *p = &priv->eee; - bool active; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1415,15 +1416,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e) if (!dev->phydev) return -ENODEV; - p->eee_enabled = e->eee_enabled; - - if (!p->eee_enabled) { - bcmgenet_eee_enable_set(dev, false, false); - } else { - active = phy_init_eee(dev->phydev, false) >= 0; - bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); - bcmgenet_eee_enable_set(dev, active, e->tx_lpi_enabled); - } + bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); return phy_ethtool_set_eee(dev->phydev, e); }
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 5ec3979..9e4110c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -665,8 +665,6 @@ struct bcmgenet_priv { u8 sopass[SOPASS_MAX]; struct bcmgenet_mib_counters mib; - - struct ethtool_keee eee; }; static inline bool bcmgenet_has_40bits(struct bcmgenet_priv *priv) @@ -749,7 +747,6 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, int bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode); -void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, - bool tx_lpi_enabled); +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable); #endif /* __BCMGENET_H__ */
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 8fb5512..96d5d4f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -123,7 +123,7 @@ static int bcmgenet_poll_wol_status(struct bcmgenet_priv *priv) while (!(bcmgenet_rbuf_readl(priv, RBUF_STATUS) & RBUF_STATUS_WOL)) { retries++; - if (retries > 5) { + if (retries > 50) { netdev_crit(dev, "polling wol mode timeout\n"); return -ETIMEDOUT; }
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 38f854b..a4e0d5a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -29,7 +29,6 @@ static void bcmgenet_mac_config(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = dev->phydev; u32 reg, cmd_bits = 0; - bool active; /* speed */ if (phydev->speed == SPEED_1000) @@ -90,10 +89,6 @@ static void bcmgenet_mac_config(struct net_device *dev) bcmgenet_umac_writel(priv, reg, UMAC_CMD); spin_unlock_bh(&priv->reg_lock); - active = phy_init_eee(phydev, 0) >= 0; - bcmgenet_eee_enable_set(dev, - priv->eee.eee_enabled && active, - priv->eee.tx_lpi_enabled); } /* setup netdev link state when PHY link status change and @@ -113,6 +108,8 @@ void bcmgenet_mii_setup(struct net_device *dev) bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } + bcmgenet_eee_enable_set(dev, phydev->enable_tx_lpi); + phy_print_status(phydev); } @@ -412,6 +409,9 @@ int bcmgenet_mii_probe(struct net_device *dev) /* Indicate that the MAC is responsible for PHY PM */ dev->phydev->mac_managed_pm = true; + if (!GENET_IS_V1(priv)) + phy_support_eee(dev->phydev); + return 0; }
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 2328fce..73a4b56 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12299,7 +12299,7 @@ static int tg3_get_link_ksettings(struct net_device *dev, ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, advertising); - if (netif_running(dev) && tp->link_up) { + if (netif_running(dev) && netif_carrier_ok(dev)) { cmd->base.speed = tp->link_config.active_speed; cmd->base.duplex = tp->link_config.active_duplex; ethtool_convert_legacy_u32_to_link_mode( @@ -17029,6 +17029,13 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) return err; } +static int tg3_is_default_mac_address(u8 *addr) +{ + static const u8 default_mac_address[ETH_ALEN] = { 0x00, 0x10, 0x18, 0x00, 0x00, 0x00 }; + + return ether_addr_equal(default_mac_address, addr); +} + static int tg3_get_device_address(struct tg3 *tp, u8 *addr) { u32 hi, lo, mac_offset; @@ -17102,6 +17109,10 @@ static int tg3_get_device_address(struct tg3 *tp, u8 *addr) if (!is_valid_ether_addr(addr)) return -EINVAL; + + if (tg3_is_default_mac_address(addr)) + return device_get_mac_address(&tp->pdev->dev, addr); + return 0; }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5bc35f6..99e7d5c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -36,6 +36,7 @@ #include <linux/tcp.h> #include <linux/types.h> #include <linux/udp.h> +#include <linux/gcd.h> #include <net/pkt_sched.h> #include "macb.h" @@ -668,6 +669,97 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, netif_tx_stop_all_queues(ndev); } +/* Use juggling algorithm to left rotate tx ring and tx skb array */ +static void gem_shuffle_tx_one_ring(struct macb_queue *queue) +{ + unsigned int head, tail, count, ring_size, desc_size; + struct macb_tx_skb tx_skb, *skb_curr, *skb_next; + struct macb_dma_desc *desc_curr, *desc_next; + unsigned int i, cycles, shift, curr, next; + struct macb *bp = queue->bp; + unsigned char desc[24]; + unsigned long flags; + + desc_size = macb_dma_desc_get_size(bp); + + if (WARN_ON_ONCE(desc_size > ARRAY_SIZE(desc))) + return; + + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + head = queue->tx_head; + tail = queue->tx_tail; + ring_size = bp->tx_ring_size; + count = CIRC_CNT(head, tail, ring_size); + + if (!(tail % ring_size)) + goto unlock; + + if (!count) { + queue->tx_head = 0; + queue->tx_tail = 0; + goto unlock; + } + + shift = tail % ring_size; + cycles = gcd(ring_size, shift); + + for (i = 0; i < cycles; i++) { + memcpy(&desc, macb_tx_desc(queue, i), desc_size); + memcpy(&tx_skb, macb_tx_skb(queue, i), + sizeof(struct macb_tx_skb)); + + curr = i; + next = (curr + shift) % ring_size; + + while (next != i) { + desc_curr = macb_tx_desc(queue, curr); + desc_next = macb_tx_desc(queue, next); + + memcpy(desc_curr, desc_next, desc_size); + + if (next == ring_size - 1) + desc_curr->ctrl &= ~MACB_BIT(TX_WRAP); + if (curr == ring_size - 1) + desc_curr->ctrl |= MACB_BIT(TX_WRAP); + + skb_curr = macb_tx_skb(queue, curr); + skb_next = macb_tx_skb(queue, next); + memcpy(skb_curr, skb_next, sizeof(struct macb_tx_skb)); + + curr = next; + next = (curr + shift) % ring_size; + } + + desc_curr = macb_tx_desc(queue, curr); + memcpy(desc_curr, &desc, desc_size); + if (i == ring_size - 1) + desc_curr->ctrl &= ~MACB_BIT(TX_WRAP); + if (curr == ring_size - 1) + desc_curr->ctrl |= MACB_BIT(TX_WRAP); + memcpy(macb_tx_skb(queue, curr), &tx_skb, + sizeof(struct macb_tx_skb)); + } + + queue->tx_head = count; + queue->tx_tail = 0; + + /* Make descriptor updates visible to hardware */ + wmb(); + +unlock: + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); +} + +/* Rotate the queue so that the tail is at index 0 */ +static void gem_shuffle_tx_rings(struct macb *bp) +{ + struct macb_queue *queue; + int q; + + for (q = 0, queue = bp->queues; q < bp->num_queues; q++, queue++) + gem_shuffle_tx_one_ring(queue); +} + static void macb_mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, @@ -706,8 +798,6 @@ static void macb_mac_link_up(struct phylink_config *config, ctrl |= MACB_BIT(PAE); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue->tx_head = 0; - queue->tx_tail = 0; queue_writel(queue, IER, bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -721,8 +811,10 @@ static void macb_mac_link_up(struct phylink_config *config, spin_unlock_irqrestore(&bp->lock, flags); - if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { macb_set_tx_clk(bp, speed); + gem_shuffle_tx_rings(bp); + } /* Enable Rx and Tx; Enable PTP unicast */ ctrl = macb_readl(bp, NCR); @@ -979,7 +1071,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge } if (tx_skb->skb) { - napi_consume_skb(tx_skb->skb, budget); + dev_consume_skb_any(tx_skb->skb); tx_skb->skb = NULL; } } @@ -2577,6 +2669,14 @@ static void macb_init_tieoff(struct macb *bp) desc->ctrl = 0; } +static void gem_init_rx_ring(struct macb_queue *queue) +{ + queue->rx_tail = 0; + queue->rx_prepared_head = 0; + + gem_rx_refill(queue); +} + static void gem_init_rings(struct macb *bp) { struct macb_queue *queue; @@ -2594,10 +2694,7 @@ static void gem_init_rings(struct macb *bp) queue->tx_head = 0; queue->tx_tail = 0; - queue->rx_tail = 0; - queue->rx_prepared_head = 0; - - gem_rx_refill(queue); + gem_init_rx_ring(queue); } macb_init_tieoff(bp); @@ -3127,7 +3224,7 @@ static void gem_get_ethtool_stats(struct net_device *dev, spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) - * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + * (GEM_STATS_LEN + QUEUE_STATS_LEN * bp->num_queues)); spin_unlock_irq(&bp->stats_lock); } @@ -3886,6 +3983,9 @@ static int gem_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) struct macb *bp = netdev_priv(netdev); int ret; + if (!(netdev->hw_features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: if ((cmd->fs.location >= bp->max_tuples) @@ -5676,9 +5776,9 @@ static int __maybe_unused macb_suspend(struct device *dev) struct macb_queue *queue; struct in_device *idev; unsigned long flags; + u32 tmp, ifa_local; unsigned int q; int err; - u32 tmp; if (!device_may_wakeup(&bp->dev->dev)) phy_exit(bp->phy); @@ -5687,14 +5787,21 @@ static int __maybe_unused macb_suspend(struct device *dev) return 0; if (bp->wol & MACB_WOL_ENABLED) { - /* Check for IP address in WOL ARP mode */ - idev = __in_dev_get_rcu(bp->dev); - if (idev) - ifa = rcu_dereference(idev->ifa_list); - if ((bp->wolopts & WAKE_ARP) && !ifa) { - netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); - return -EOPNOTSUPP; + if (bp->wolopts & WAKE_ARP) { + /* Check for IP address in WOL ARP mode */ + rcu_read_lock(); + idev = __in_dev_get_rcu(bp->dev); + if (idev) + ifa = rcu_dereference(idev->ifa_list); + if (!ifa) { + rcu_read_unlock(); + netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); + return -EOPNOTSUPP; + } + ifa_local = be32_to_cpu(ifa->ifa_local); + rcu_read_unlock(); } + spin_lock_irqsave(&bp->lock, flags); /* Disable Tx and Rx engines before disabling the queues, @@ -5733,8 +5840,9 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wolopts & WAKE_ARP) { tmp |= MACB_BIT(ARP); /* write IP address into register */ - tmp |= MACB_BFEXT(IP, be32_to_cpu(ifa->ifa_local)); + tmp |= MACB_BFEXT(IP, ifa_local); } + spin_unlock_irqrestore(&bp->lock, flags); /* Change interrupt handler and * Enable WoL IRQ on queue 0 @@ -5747,11 +5855,12 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, GEM_BIT(WOL)); gem_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } else { err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, IRQF_SHARED, netdev->name, bp->queues); @@ -5759,13 +5868,13 @@ static int __maybe_unused macb_suspend(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } + spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, MACB_BIT(WOL)); macb_writel(bp, WOL, tmp); + spin_unlock_irqrestore(&bp->lock, flags); } - spin_unlock_irqrestore(&bp->lock, flags); enable_irq_wake(bp->queues[0].irq); } @@ -5832,6 +5941,8 @@ static int __maybe_unused macb_resume(struct device *dev) queue_readl(bp->queues, ISR); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(bp->queues, ISR, -1); + spin_unlock_irqrestore(&bp->lock, flags); + /* Replace interrupt handler on queue 0 */ devm_free_irq(dev, bp->queues[0].irq, bp->queues); err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, @@ -5840,10 +5951,8 @@ static int __maybe_unused macb_resume(struct device *dev) dev_err(dev, "Unable to request IRQ %d (error %d)\n", bp->queues[0].irq, err); - spin_unlock_irqrestore(&bp->lock, flags); return err; } - spin_unlock_irqrestore(&bp->lock, flags); disable_irq_wake(bp->queues[0].irq); @@ -5855,8 +5964,18 @@ static int __maybe_unused macb_resume(struct device *dev) rtnl_unlock(); } + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + macb_init_buffers(bp); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { + if (macb_is_gem(bp)) + gem_init_rx_ring(queue); + else + macb_init_rx_ring(queue); + } + napi_enable(&queue->napi_rx); napi_enable(&queue->napi_tx); }
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index fc4f5ae..b79dec1 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -96,10 +96,10 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_plat_dev_register: - clk_unregister(plat_data.hclk); + clk_unregister_fixed_rate(plat_data.hclk); err_hclk_register: - clk_unregister(plat_data.pclk); + clk_unregister_fixed_rate(plat_data.pclk); err_pclk_register: return err; @@ -109,10 +109,12 @@ static void macb_remove(struct pci_dev *pdev) { struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); + struct clk *pclk = plat_data->pclk; + struct clk *hclk = plat_data->hclk; - clk_unregister(plat_data->pclk); - clk_unregister(plat_data->hclk); platform_device_unregister(plat_dev); + clk_unregister_fixed_rate(pclk); + clk_unregister_fixed_rate(hclk); } static const struct pci_device_id dev_id_table[] = {
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index c9e7781..d91f7b1 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -357,8 +357,10 @@ void gem_ptp_remove(struct net_device *ndev) { struct macb *bp = netdev_priv(ndev); - if (bp->ptp_clock) + if (bp->ptp_clock) { ptp_clock_unregister(bp->ptp_clock); + bp->ptp_clock = NULL; + } gem_ptp_clear_timer(bp);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 1e91e79..6d2fe5c 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -977,19 +977,19 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) priv->tx_skbs = kcalloc(MAX_TX_QUEUE_ENTRIES, sizeof(void *), GFP_KERNEL); if (!priv->tx_skbs) - return -ENOMEM; + goto err_free_rx_skbs; /* Allocate descriptors */ priv->rxdes = dma_alloc_coherent(priv->dev, MAX_RX_QUEUE_ENTRIES * sizeof(struct ftgmac100_rxdes), &priv->rxdes_dma, GFP_KERNEL); if (!priv->rxdes) - return -ENOMEM; + goto err_free_tx_skbs; priv->txdes = dma_alloc_coherent(priv->dev, MAX_TX_QUEUE_ENTRIES * sizeof(struct ftgmac100_txdes), &priv->txdes_dma, GFP_KERNEL); if (!priv->txdes) - return -ENOMEM; + goto err_free_rxdes; /* Allocate scratch packet buffer */ priv->rx_scratch = dma_alloc_coherent(priv->dev, @@ -997,9 +997,29 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) &priv->rx_scratch_dma, GFP_KERNEL); if (!priv->rx_scratch) - return -ENOMEM; + goto err_free_txdes; return 0; + +err_free_txdes: + dma_free_coherent(priv->dev, + MAX_TX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_txdes), + priv->txdes, priv->txdes_dma); + priv->txdes = NULL; +err_free_rxdes: + dma_free_coherent(priv->dev, + MAX_RX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_rxdes), + priv->rxdes, priv->rxdes_dma); + priv->rxdes = NULL; +err_free_tx_skbs: + kfree(priv->tx_skbs); + priv->tx_skbs = NULL; +err_free_rx_skbs: + kfree(priv->rx_skbs); + priv->rx_skbs = NULL; + return -ENOMEM; } static void ftgmac100_init_rings(struct ftgmac100 *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index def2dd2..52c1cb9cb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1533,7 +1533,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) if_id = (status & 0xFFFF0000) >> 16; if (if_id >= ethsw->sw_attr.num_ifs) { dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id); - goto out; + goto out_clear; } port_priv = ethsw->ports[if_id]; @@ -1553,6 +1553,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) dpaa2_switch_port_connect_mac(port_priv); } +out_clear: err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, status); if (err) @@ -3034,6 +3035,13 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev) goto err_close; } + if (ethsw->sw_attr.num_ifs >= DPSW_MAX_IF) { + dev_err(dev, "DPSW num_ifs %u exceeds max %u\n", + ethsw->sw_attr.num_ifs, DPSW_MAX_IF); + err = -EINVAL; + goto err_close; + } + err = dpsw_get_api_version(ethsw->mc_io, 0, ðsw->major, ðsw->minor);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 7076839..aa8a871 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2578,6 +2578,7 @@ EXPORT_SYMBOL_GPL(enetc_free_si_resources); static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) { + struct enetc_si *si = container_of(hw, struct enetc_si, hw); int idx = tx_ring->index; u32 tbmr; @@ -2591,10 +2592,20 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) enetc_txbdr_wr(hw, idx, ENETC_TBLENR, ENETC_RTBLENR_LEN(tx_ring->bd_count)); - /* clearing PI/CI registers for Tx not supported, adjust sw indexes */ + /* For ENETC v1, clearing PI/CI registers for Tx not supported, + * adjust sw indexes + */ tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR); tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR); + if (tx_ring->next_to_use != tx_ring->next_to_clean && + !is_enetc_rev1(si)) { + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + enetc_txbdr_wr(hw, idx, ENETC_TBPIR, 0); + enetc_txbdr_wr(hw, idx, ENETC_TBCIR, 0); + } + /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); @@ -3467,7 +3478,7 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, priv->rx_ring[i] = bdr; err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, - ENETC_RXB_DMA_SIZE_XDP); + ENETC_RXB_TRUESIZE); if (err) goto free_vector;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 3ed0f7a..719c88c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
@@ -134,6 +134,12 @@ /* Port operational register */ #define ENETC4_POR 0x4100 +#define POR_TXDIS BIT(0) +#define POR_RXDIS BIT(1) + +/* Port status register */ +#define ENETC4_PSR 0x4104 +#define PSR_RX_BUSY BIT(1) /* Port traffic class a transmit maximum SDU register */ #define ENETC4_PTCTMSDUR(a) ((a) * 0x20 + 0x4208) @@ -173,6 +179,11 @@ /* Port internal MDIO base address, use to access PCS */ #define ENETC4_PM_IMDIO_BASE 0x5030 +/* Port MAC 0/1 Interrupt Event Register */ +#define ENETC4_PM_IEVENT(mac) (0x5040 + (mac) * 0x400) +#define PM_IEVENT_TX_EMPTY BIT(5) +#define PM_IEVENT_RX_EMPTY BIT(6) + /* Port MAC 0/1 Pause Quanta Register */ #define ENETC4_PM_PAUSE_QUANTA(mac) (0x5054 + (mac) * 0x400)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 689b9f1..56899f2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -444,20 +444,11 @@ static void enetc4_set_trx_frame_size(struct enetc_pf *pf) enetc4_pf_reset_tc_msdu(&si->hw); } -static void enetc4_enable_trx(struct enetc_pf *pf) -{ - struct enetc_hw *hw = &pf->si->hw; - - /* Enable port transmit/receive */ - enetc_port_wr(hw, ENETC4_POR, 0); -} - static void enetc4_configure_port(struct enetc_pf *pf) { enetc4_configure_port_si(pf); enetc4_set_trx_frame_size(pf); enetc_set_default_rss_key(pf); - enetc4_enable_trx(pf); } static int enetc4_init_ntmp_user(struct enetc_si *si) @@ -801,15 +792,112 @@ static void enetc4_set_tx_pause(struct enetc_pf *pf, int num_rxbdr, bool tx_paus enetc_port_wr(hw, ENETC4_PPAUOFFTR, pause_off_thresh); } -static void enetc4_enable_mac(struct enetc_pf *pf, bool en) +static void enetc4_mac_wait_tx_empty(struct enetc_si *si, int mac) { + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_TX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d TX is not empty\n", mac); +} + +static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + + if (enetc_is_pseudo_mac(si)) + return; + + enetc4_mac_wait_tx_empty(si, 0); + if (si->hw_features & ENETC_SI_F_QBU) + enetc4_mac_wait_tx_empty(si, 1); + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); +} + +static void enetc4_mac_tx_enable(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; struct enetc_si *si = pf->si; u32 val; val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); - val &= ~(PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN); - val |= en ? (PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN) : 0; + val |= PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_wait_rx_empty(struct enetc_si *si, int mac) +{ + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_RX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d RX is not empty\n", mac); +} + +static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + if (enetc_is_pseudo_mac(si)) + goto check_rx_busy; + + if (si->hw_features & ENETC_SI_F_QBU) { + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(1), val); + enetc4_mac_wait_rx_empty(si, 1); + } + + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val); + enetc4_mac_wait_rx_empty(si, 0); + +check_rx_busy: + if (read_poll_timeout(enetc_port_rd, val, + !(val & PSR_RX_BUSY), + 100, 10000, false, hw, + ENETC4_PSR)) + dev_warn(&si->pdev->dev, "Port RX busy\n"); + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_rx_enable(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val |= PM_CMD_CFG_RX_EN; enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); } @@ -853,7 +941,8 @@ static void enetc4_pl_mac_link_up(struct phylink_config *config, enetc4_set_hd_flow_control(pf, hd_fc); enetc4_set_tx_pause(pf, priv->num_rx_rings, tx_pause); enetc4_set_rx_pause(pf, rx_pause); - enetc4_enable_mac(pf, true); + enetc4_mac_tx_enable(pf); + enetc4_mac_rx_enable(pf); } static void enetc4_pl_mac_link_down(struct phylink_config *config, @@ -862,7 +951,8 @@ static void enetc4_pl_mac_link_down(struct phylink_config *config, { struct enetc_pf *pf = phylink_to_enetc_pf(config); - enetc4_enable_mac(pf, false); + enetc4_mac_rx_graceful_stop(pf); + enetc4_mac_tx_graceful_stop(pf); } static const struct phylink_mac_ops enetc_pl_mac_ops = {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fed89d4..7c17aca 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -795,9 +795,17 @@ static int enetc_set_rxfh(struct net_device *ndev, struct enetc_si *si = priv->si; int err = 0; + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + /* set hash key, if PF */ - if (rxfh->key && enetc_si_is_pf(si)) + if (rxfh->key) { + if (!enetc_si_is_pf(si)) + return -EOPNOTSUPP; + enetc_set_rss_key(si, rxfh->key); + } /* set RSS table */ if (rxfh->indir) @@ -813,6 +821,8 @@ static void enetc_get_ringparam(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); + ring->rx_max_pending = priv->rx_bd_count; + ring->tx_max_pending = priv->tx_bd_count; ring->rx_pending = priv->rx_bd_count; ring->tx_pending = priv->tx_bd_count;
diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index 7fd39f8..92a0f82 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c
@@ -333,11 +333,13 @@ static int netc_get_phy_addr(struct device_node *np) mdio_node = of_get_child_by_name(np, "mdio"); if (!mdio_node) - return 0; + return -ENODEV; phy_node = of_get_next_child(mdio_node, NULL); - if (!phy_node) + if (!phy_node) { + err = -ENODEV; goto of_put_mdio_node; + } err = of_property_read_u32(phy_node, "reg", &addr); if (err) @@ -423,6 +425,9 @@ static int imx95_enetc_mdio_phyaddr_config(struct platform_device *pdev) addr = netc_get_phy_addr(gchild); if (addr < 0) { + if (addr == -ENODEV) + continue; + dev_err(dev, "Failed to get PHY address\n"); return addr; } @@ -433,12 +438,6 @@ static int imx95_enetc_mdio_phyaddr_config(struct platform_device *pdev) return -EINVAL; } - /* The default value of LaBCR[MDIO_PHYAD_PRTAD ] is - * 0, so no need to set the register. - */ - if (!addr) - continue; - switch (bus_devfn) { case IMX95_ENETC0_BUS_DEVFN: netc_reg_write(priv->ierb, IERB_LBCR(0), @@ -578,16 +577,13 @@ static int imx94_enetc_mdio_phyaddr_config(struct netc_blk_ctrl *priv, addr = netc_get_phy_addr(np); if (addr < 0) { + if (addr == -ENODEV) + return 0; + dev_err(dev, "Failed to get PHY address\n"); return addr; } - /* The default value of LaBCR[MDIO_PHYAD_PRTAD] is 0, - * so no need to set the register. - */ - if (!addr) - return 0; - if (phy_mask & BIT(addr)) { dev_err(dev, "Find same PHY address in EMDIO and ENETC node\n");
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 4b7bad9..56801c2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -545,9 +545,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, if (rq->perout.flags) return -EOPNOTSUPP; - if (rq->perout.index != fep->pps_channel) - return -EOPNOTSUPP; - period.tv_sec = rq->perout.period.sec; period.tv_nsec = rq->perout.period.nsec; period_ns = timespec64_to_ns(&period);
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 9031dd6..4db9d4f 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -167,6 +167,25 @@ gve_free_pending_packet(struct gve_tx_ring *tx, } } +static void gve_unmap_packet(struct device *dev, + struct gve_tx_pending_packet_dqo *pkt) +{ + int i; + + if (!pkt->num_bufs) + return; + + /* SKB linear portion is guaranteed to be mapped */ + dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), + dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); + for (i = 1; i < pkt->num_bufs; i++) { + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); + } + pkt->num_bufs = 0; +} + /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. */ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) @@ -176,21 +195,12 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) for (i = 0; i < tx->dqo.num_pending_packets; i++) { struct gve_tx_pending_packet_dqo *cur_state = &tx->dqo.pending_packets[i]; - int j; - for (j = 0; j < cur_state->num_bufs; j++) { - if (j == 0) { - dma_unmap_single(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } else { - dma_unmap_page(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } - } + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, cur_state); + else + gve_unmap_packet(tx->dev, cur_state); + if (cur_state->skb) { dev_consume_skb_any(cur_state->skb); cur_state->skb = NULL; @@ -1154,22 +1164,6 @@ static void remove_from_list(struct gve_tx_ring *tx, } } -static void gve_unmap_packet(struct device *dev, - struct gve_tx_pending_packet_dqo *pkt) -{ - int i; - - /* SKB linear portion is guaranteed to be mapped */ - dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), - dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); - for (i = 1; i < pkt->num_bufs; i++) { - netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), - DMA_TO_DEVICE, 0); - } - pkt->num_bufs = 0; -} - /* Completion types and expected behavior: * No Miss compl + Packet compl = Packet completed normally. * Miss compl + Re-inject compl = Packet completed normally.
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3d7648a..9b09eb1 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -2952,8 +2952,6 @@ static int e1000_tx_map(struct e1000_adapter *adapter, dma_error: dev_err(&pdev->dev, "TX DMA map failed\n"); buffer_info->dma = 0; - if (count) - count--; while (count--) { if (i == 0)
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index ba33189..d4a1041 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -33,6 +33,7 @@ /* Extended Device Control */ #define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */ +#define E1000_CTRL_EXT_DPG_EN 0x00000008 /* Dynamic Power Gating Enable */ #define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */ #define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */ #define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index aa08f39..63ebe00 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -117,7 +117,8 @@ enum e1000_boards { board_pch_cnp, board_pch_tgp, board_pch_adp, - board_pch_mtp + board_pch_mtp, + board_pch_ptp }; struct e1000_ps_page { @@ -527,6 +528,7 @@ extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_pch_adp_info; extern const struct e1000_info e1000_pch_mtp_info; +extern const struct e1000_info e1000_pch_ptp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index fc8ed38..c7ac599 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -118,8 +118,6 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_ARL_I219_V24 0x57A1 #define E1000_DEV_ID_PCH_PTP_I219_LM25 0x57B3 #define E1000_DEV_ID_PCH_PTP_I219_V25 0x57B4 -#define E1000_DEV_ID_PCH_PTP_I219_LM26 0x57B5 -#define E1000_DEV_ID_PCH_PTP_I219_V26 0x57B6 #define E1000_DEV_ID_PCH_PTP_I219_LM27 0x57B7 #define E1000_DEV_ID_PCH_PTP_I219_V27 0x57B8 #define E1000_DEV_ID_PCH_NVL_I219_LM29 0x57B9
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 0ff8688a..dea208d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -528,7 +528,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->id = e1000_phy_unknown; - if (hw->mac.type == e1000_pch_mtp) { + if (hw->mac.type == e1000_pch_mtp || hw->mac.type == e1000_pch_ptp) { phy->retry_count = 2; e1000e_enable_phy_retry(hw); } @@ -4932,6 +4932,15 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) reg |= E1000_KABGTXD_BGSQLBIAS; ew32(KABGTXD, reg); + /* The hardware reset value of the DPG_EN bit is 1. + * Clear DPG_EN to prevent unexpected autonomous power gating. + */ + if (hw->mac.type >= e1000_pch_ptp) { + reg = er32(CTRL_EXT); + reg &= ~E1000_CTRL_EXT_DPG_EN; + ew32(CTRL_EXT, reg); + } + return 0; } @@ -6208,3 +6217,23 @@ const struct e1000_info e1000_pch_mtp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_ptp_info = { + .mac = e1000_pch_ptp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +};
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fd4b117..9befdac 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -55,6 +55,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_tgp] = &e1000_pch_tgp_info, [board_pch_adp] = &e1000_pch_adp_info, [board_pch_mtp] = &e1000_pch_mtp_info, + [board_pch_ptp] = &e1000_pch_ptp_info, }; struct e1000_reg_info { @@ -5651,8 +5652,6 @@ static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb, dma_error: dev_err(&pdev->dev, "Tx DMA map failed\n"); buffer_info->dma = 0; - if (count) - count--; while (count--) { if (i == 0) @@ -7922,14 +7921,12 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_mtp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_ptp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_ptp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7b9e147..926d001 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3569,6 +3569,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; + u32 xdp_frame_sz; int err = 0; bool ok; @@ -3578,49 +3579,47 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) memset(&rx_ctx, 0, sizeof(rx_ctx)); ring->rx_buf_len = vsi->rx_buf_len; + xdp_frame_sz = i40e_rx_pg_size(ring) / 2; /* XDP RX-queue info only needed for RX rings exposed to XDP */ if (ring->vsi->type != I40E_VSI_MAIN) goto skip; - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->queue_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - return err; - } - ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - xdp_rxq_info_unreg(&ring->xdp_rxq); + xdp_frame_sz = xsk_pool_get_rx_frag_step(ring->xsk_pool); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, ring->q_vector->napi.napi_id, - ring->rx_buf_len); + xdp_frame_sz); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); if (err) - return err; + goto unreg_xdp; dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->queue_index); } else { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + xdp_frame_sz); + if (err) + return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); if (err) - return err; + goto unreg_xdp; } skip: - xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, xdp_frame_sz, &ring->xdp_rxq); rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); @@ -3654,7 +3653,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); - return -ENOMEM; + err = -ENOMEM; + goto unreg_xdp; } /* set the context in the HMC */ @@ -3663,7 +3663,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) dev_info(&vsi->back->pdev->dev, "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); - return -ENOMEM; + err = -ENOMEM; + goto unreg_xdp; } /* configure Rx buffer alignment */ @@ -3671,7 +3672,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (I40E_2K_TOO_SMALL_WITH_PADDING) { dev_info(&vsi->back->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unreg_xdp; } clear_ring_build_skb_enabled(ring); } else { @@ -3701,6 +3703,11 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } return 0; +unreg_xdp: + if (ring->vsi->type == I40E_VSI_MAIN) + xdp_rxq_info_unreg(&ring->xdp_rxq); + + return err; } /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index 759f3d1c..dde0ccd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -88,7 +88,7 @@ TRACE_EVENT(i40e_napi_poll, __entry->rx_clean_complete = rx_clean_complete; __entry->tx_clean_complete = tx_clean_complete; __entry->irq_num = q->irq_num; - __entry->curr_cpu = get_cpu(); + __entry->curr_cpu = smp_processor_id(); __assign_str(qname); __assign_str(dev_name); __assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 34db7d8..894f2d0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1470,6 +1470,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (rx_ring->xsk_pool) { i40e_xsk_clean_rx_ring(rx_ring); goto skip_free; @@ -1527,8 +1530,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) void i40e_free_rx_resources(struct i40e_ring *rx_ring) { i40e_clean_rx_ring(rx_ring); - if (rx_ring->vsi->type == I40E_VSI_MAIN) - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); rx_ring->xdp_prog = NULL; kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index fdf40f8..a26c3d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3833,10 +3833,10 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) cfilter.n_proto = ETH_P_IP; if (mask.dst_ip[0] & tcf.dst_ip[0]) memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip, - ARRAY_SIZE(tcf.dst_ip)); - else if (mask.src_ip[0] & tcf.dst_ip[0]) + sizeof(cfilter.ip.v4.dst_ip)); + else if (mask.src_ip[0] & tcf.src_ip[0]) memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip, - ARRAY_SIZE(tcf.dst_ip)); + sizeof(cfilter.ip.v4.src_ip)); break; case VIRTCHNL_TCP_V6_FLOW: cfilter.n_proto = ETH_P_IPV6; @@ -3891,7 +3891,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) /* for ipv6, mask is set for all sixteen bytes (4 words) */ if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3]) if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6, - sizeof(cfilter.ip.v6.src_ip6))) + sizeof(cfilter.ip.v6.dst_ip6))) continue; if (mask.vlan_id) if (cfilter.vlan_id != cf->vlan_id) @@ -3979,10 +3979,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) cfilter->n_proto = ETH_P_IP; if (mask.dst_ip[0] & tcf.dst_ip[0]) memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip, - ARRAY_SIZE(tcf.dst_ip)); - else if (mask.src_ip[0] & tcf.dst_ip[0]) + sizeof(cfilter->ip.v4.dst_ip)); + else if (mask.src_ip[0] & tcf.src_ip[0]) memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip, - ARRAY_SIZE(tcf.dst_ip)); + sizeof(cfilter->ip.v4.src_ip)); break; case VIRTCHNL_TCP_V6_FLOW: cfilter->n_proto = ETH_P_IPV6;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index a87e0c6..e9fb0a0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -260,7 +260,6 @@ struct iavf_adapter { struct work_struct adminq_task; struct work_struct finish_config; wait_queue_head_t down_waitqueue; - wait_queue_head_t reset_waitqueue; wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; @@ -626,5 +625,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); -int iavf_wait_for_reset(struct iavf_adapter *adapter); +void iavf_reset_step(struct iavf_adapter *adapter); #endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index f3a1b2f..1cd1f3f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -313,14 +313,13 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) { /* Report the maximum number queues, even if not every queue is * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. + * use netdev->num_tx_queues * 2. The num_tx_queues is set at + * device creation and never changes. */ if (sset == ETH_SS_STATS) return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); + (IAVF_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues); else return -EINVAL; } @@ -345,19 +344,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats); rcu_read_lock(); - /* As num_active_queues describe both tx and rx queues, we can use - * it to iterate over rings' stats. + /* Use num_tx_queues to report stats for the maximum number of queues. + * Queues beyond num_active_queues will report zero. */ - for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + for (i = 0; i < netdev->num_tx_queues; i++) { + struct iavf_ring *tx_ring = NULL, *rx_ring = NULL; - /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); + if (i < adapter->num_active_queues) { + tx_ring = &adapter->tx_rings[i]; + rx_ring = &adapter->rx_rings[i]; + } - /* Rx rings stats */ - ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + iavf_add_queue_stats(&data, tx_ring); + iavf_add_queue_stats(&data, rx_ring); } rcu_read_unlock(); } @@ -376,9 +375,9 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) iavf_add_stat_strings(&data, iavf_gstrings_stats); /* Queues are always allocated in pairs, so we just use - * real_num_tx_queues for both Tx and Rx queues. + * num_tx_queues for both Tx and Rx queues. */ - for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (i = 0; i < netdev->num_tx_queues; i++) { iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, "tx", i); iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, @@ -492,7 +491,6 @@ static int iavf_set_ringparam(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 new_rx_count, new_tx_count; - int ret = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -537,13 +535,11 @@ static int iavf_set_ringparam(struct net_device *netdev, } if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); } - return ret; + return 0; } /** @@ -1723,7 +1719,6 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; - int ret = 0; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -1745,13 +1740,10 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset"); - - return ret; + return 0; } /**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index bceaf4b..dad001a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -186,31 +186,6 @@ static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter) } /** - * iavf_wait_for_reset - Wait for reset to finish. - * @adapter: board private structure - * - * Returns 0 if reset finished successfully, negative on timeout or interrupt. - */ -int iavf_wait_for_reset(struct iavf_adapter *adapter) -{ - int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue, - !iavf_is_reset_in_progress(adapter), - msecs_to_jiffies(5000)); - - /* If ret < 0 then it means wait was interrupted. - * If ret == 0 then it means we got a timeout while waiting - * for reset to finish. - * If ret > 0 it means reset has finished. - */ - if (ret > 0) - return 0; - else if (ret < 0) - return -EINTR; - else - return -EBUSY; -} - -/** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out @@ -782,10 +757,13 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } else if (f->state == IAVF_VLAN_REMOVE) { - /* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed. - * We can safely only change the state here. + /* Re-add the filter since we cannot tell whether the + * pending delete has already been processed by the PF. + * A duplicate add is harmless. */ - f->state = IAVF_VLAN_ACTIVE; + f->state = IAVF_VLAN_ADD; + iavf_schedule_aq_request(adapter, + IAVF_FLAG_AQ_ADD_VLAN_FILTER); } clearout: @@ -2793,7 +2771,22 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) netdev->watchdog_timeo = 5 * HZ; netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = LIBIE_MAX_MTU; + + /* PF/VF API: vf_res->max_mtu is max frame size (not MTU). + * Convert to MTU. + */ + if (!adapter->vf_res->max_mtu) { + netdev->max_mtu = LIBIE_MAX_MTU; + } else if (adapter->vf_res->max_mtu < LIBETH_RX_LL_LEN + ETH_MIN_MTU || + adapter->vf_res->max_mtu > + LIBETH_RX_LL_LEN + LIBIE_MAX_MTU) { + netdev_warn_once(adapter->netdev, + "invalid max frame size %d from PF, using default MTU %d", + adapter->vf_res->max_mtu, LIBIE_MAX_MTU); + netdev->max_mtu = LIBIE_MAX_MTU; + } else { + netdev->max_mtu = adapter->vf_res->max_mtu - LIBETH_RX_LL_LEN; + } if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", @@ -3021,6 +3014,8 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + iavf_ptp_release(adapter); + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. @@ -3096,18 +3091,16 @@ static void iavf_reconfig_qs_bw(struct iavf_adapter *adapter) } /** - * iavf_reset_task - Call-back task to handle hardware reset - * @work: pointer to work_struct + * iavf_reset_step - Perform the VF reset sequence + * @adapter: board private structure * - * During reset we need to shut down and reinitialize the admin queue - * before we can use it to communicate with the PF again. We also clear - * and reinit the rings because that context is lost as well. - **/ -static void iavf_reset_task(struct work_struct *work) + * Requests a reset from PF, polls for completion, and reconfigures + * the driver. Caller must hold the netdev instance lock. + * + * This can sleep for several seconds while polling HW registers. + */ +void iavf_reset_step(struct iavf_adapter *adapter) { - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - reset_task); struct virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; @@ -3118,7 +3111,7 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - netdev_lock(netdev); + netdev_assert_locked(netdev); iavf_misc_irq_disable(adapter); if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { @@ -3163,7 +3156,6 @@ static void iavf_reset_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); - netdev_unlock(netdev); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3175,7 +3167,6 @@ static void iavf_reset_task(struct work_struct *work) iavf_startup(adapter); queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(30)); - netdev_unlock(netdev); return; } @@ -3196,6 +3187,8 @@ static void iavf_reset_task(struct work_struct *work) iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + iavf_ptp_release(adapter); + /* free the Tx/Rx rings and descriptors, might be better to just * re-use them sometime in the future */ @@ -3316,9 +3309,6 @@ static void iavf_reset_task(struct work_struct *work) adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - wake_up(&adapter->reset_waitqueue); - netdev_unlock(netdev); - return; reset_err: if (running) { @@ -3327,10 +3317,21 @@ static void iavf_reset_task(struct work_struct *work) } iavf_disable_vf(adapter); - netdev_unlock(netdev); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); } +static void iavf_reset_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + reset_task); + struct net_device *netdev = adapter->netdev; + + netdev_lock(netdev); + iavf_reset_step(adapter); + netdev_unlock(netdev); +} + /** * iavf_adminq_task - worker thread to clean the admin queue * @work: pointer to work_struct containing our data @@ -4596,22 +4597,17 @@ static int iavf_close(struct net_device *netdev) static int iavf_change_mtu(struct net_device *netdev, int new_mtu) { struct iavf_adapter *adapter = netdev_priv(netdev); - int ret = 0; netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret < 0) - netdev_warn(netdev, "MTU change interrupted waiting for reset"); - else if (ret) - netdev_warn(netdev, "MTU change timed out waiting for reset"); + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + iavf_reset_step(adapter); } - return ret; + return 0; } /** @@ -5416,9 +5412,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); - /* Setup the wait queue for indicating transition to running state */ - init_waitqueue_head(&adapter->reset_waitqueue); - /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 8815608..a52c100 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -2736,7 +2736,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ iavf_irq_enable(adapter, true); - wake_up(&adapter->reset_waitqueue); adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES:
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 6c72bd1..6144cee 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -1360,7 +1360,7 @@ ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, cdev = pf->cdev_info; if (!cdev) - return -ENODEV; + return -EOPNOTSUPP; ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_ROCEV2); @@ -1427,7 +1427,7 @@ ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, cdev = pf->cdev_info; if (!cdev) - return -ENODEV; + return -EOPNOTSUPP; ctx->val.vbool = !!(cdev->rdma_protocol & IIDC_RDMA_PROTOCOL_IWARP);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index def7efa..eb3a483 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -840,6 +840,28 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) } /** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Tx queues + */ +static inline int ice_get_max_txq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + * + * Return: maximum number of Rx queues + */ +static inline int ice_get_max_rxq(struct ice_pf *pf) +{ + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); +} + +/** * ice_get_main_vsi - Get the PF VSI * @pf: PF instance * @@ -987,6 +1009,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); int ice_plug_aux_dev(struct ice_pf *pf); void ice_unplug_aux_dev(struct ice_pf *pf); +void ice_rdma_finalize_setup(struct ice_pf *pf); int ice_init_rdma(struct ice_pf *pf); void ice_deinit_rdma(struct ice_pf *pf); bool ice_is_wol_supported(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index eb77dfa..1667f68 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -124,6 +124,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) if (vsi->type == ICE_VSI_VF) { ice_calc_vf_reg_idx(vsi->vf, q_vector); goto out; + } else if (vsi->type == ICE_VSI_LB) { + goto skip_alloc; } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); @@ -659,33 +661,22 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); u32 num_bufs = ICE_DESC_UNUSED(ring); - u32 rx_buf_len; int err; - if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - return err; - } - + if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF || + ring->vsi->type == ICE_VSI_LB) { ice_rx_xsk_pool(ring); err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); if (err) return err; if (ring->xsk_pool) { - xdp_rxq_info_unreg(&ring->xdp_rxq); - - rx_buf_len = - xsk_pool_get_rx_frame_size(ring->xsk_pool); + u32 frag_size = + xsk_pool_get_rx_frag_step(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id, - rx_buf_len); + frag_size); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, @@ -702,14 +693,13 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (err) return err; - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->rx_buf_len); - if (err) - goto err_destroy_fq; - } + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->truesize); + if (err) + goto err_destroy_fq; + xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, ring->pp); }
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 6cd6319..ce11fea 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1816,6 +1816,7 @@ static bool ice_should_retry_sq_send_cmd(u16 opcode) case ice_aqc_opc_lldp_stop: case ice_aqc_opc_lldp_start: case ice_aqc_opc_lldp_filter_ctrl: + case ice_aqc_opc_sff_eeprom: return true; } @@ -1841,6 +1842,7 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, { struct libie_aq_desc desc_cpy; bool is_cmd_for_retry; + u8 *buf_cpy = NULL; u8 idx = 0; u16 opcode; int status; @@ -1850,8 +1852,11 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, memset(&desc_cpy, 0, sizeof(desc_cpy)); if (is_cmd_for_retry) { - /* All retryable cmds are direct, without buf. */ - WARN_ON(buf); + if (buf) { + buf_cpy = kmemdup(buf, buf_size, GFP_KERNEL); + if (!buf_cpy) + return -ENOMEM; + } memcpy(&desc_cpy, desc, sizeof(desc_cpy)); } @@ -1863,12 +1868,14 @@ ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, hw->adminq.sq_last_status != LIBIE_AQ_RC_EBUSY) break; + if (buf_cpy) + memcpy(buf, buf_cpy, buf_size); memcpy(desc, &desc_cpy, sizeof(desc_cpy)); - msleep(ICE_SQ_SEND_DELAY_TIME_MS); } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); + kfree(buf_cpy); return status; } @@ -6391,7 +6398,7 @@ int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add) struct ice_aqc_lldp_filter_ctrl *cmd; struct libie_aq_desc desc; - if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw)) + if (!ice_fw_supports_lldp_fltr_ctrl(hw)) return -EOPNOTSUPP; cmd = libie_aq_raw(&desc);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 0ea64b3..e6a20af 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1289,6 +1289,10 @@ static u64 ice_loopback_test(struct net_device *netdev) test_vsi->netdev = netdev; tx_ring = test_vsi->tx_rings[0]; rx_ring = test_vsi->rx_rings[0]; + /* Dummy q_vector and napi. Fill the minimum required for + * ice_rxq_pp_create(). + */ + rx_ring->q_vector->napi.dev = netdev; if (ice_lbtest_prepare_rings(test_vsi)) { ret = 2; @@ -1926,6 +1930,17 @@ __ice_get_ethtool_stats(struct net_device *netdev, int i = 0; char *p; + if (ice_is_port_repr_netdev(netdev)) { + ice_update_eth_stats(vsi); + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + return; + } + ice_update_pf_stats(pf); ice_update_vsi_stats(vsi); @@ -1935,9 +1950,6 @@ __ice_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - if (ice_is_port_repr_netdev(netdev)) - return; - /* populate per queue stats */ rcu_read_lock(); @@ -3328,7 +3340,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, rx_rings = kzalloc_objs(*rx_rings, vsi->num_rxq); if (!rx_rings) { err = -ENOMEM; - goto done; + goto free_xdp; } ice_for_each_rxq(vsi, i) { @@ -3338,6 +3350,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; rx_rings[i].xdp_buf = NULL; + rx_rings[i].xdp_rxq = (struct xdp_rxq_info){ }; /* this is to allow wr32 to have something to write to * during early allocation of Rx buffers @@ -3355,7 +3368,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, } kfree(rx_rings); err = -ENOMEM; - goto free_tx; + goto free_xdp; } } @@ -3407,6 +3420,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, } goto done; +free_xdp: + if (xdp_rings) { + ice_for_each_xdp_txq(vsi, i) + ice_free_tx_ring(&xdp_rings[i]); + kfree(xdp_rings); + } + free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { @@ -3762,24 +3782,6 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) } /** - * ice_get_max_txq - return the maximum number of Tx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_txq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); -} - -/** - * ice_get_max_rxq - return the maximum number of Rx queues for in a PF - * @pf: PF structure - */ -static int ice_get_max_rxq(struct ice_pf *pf) -{ - return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); -} - -/** * ice_get_combined_cnt - return the current number of combined channels * @vsi: PF VSI pointer * @@ -4505,7 +4507,7 @@ ice_get_module_eeprom(struct net_device *netdev, u8 addr = ICE_I2C_EEPROM_DEV_ADDR; struct ice_hw *hw = &pf->hw; bool is_sfp = false; - unsigned int i, j; + unsigned int i; u16 offset = 0; u8 page = 0; int status; @@ -4547,26 +4549,19 @@ ice_get_module_eeprom(struct net_device *netdev, if (page == 0 || !(data[0x2] & 0x4)) { u32 copy_len; - /* If i2c bus is busy due to slow page change or - * link management access, call can fail. This is normal. - * So we retry this a few times. - */ - for (j = 0; j < 4; j++) { - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, - !is_sfp, value, - SFF_READ_BLOCK_SIZE, - 0, NULL); - netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", - addr, offset, page, is_sfp, - value[0], value[1], value[2], value[3], - value[4], value[5], value[6], value[7], - status); - if (status) { - usleep_range(1500, 2500); - memset(value, 0, SFF_READ_BLOCK_SIZE); - continue; - } - break; + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, + !is_sfp, value, + SFF_READ_BLOCK_SIZE, + 0, NULL); + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%pe)\n", + addr, offset, page, is_sfp, + value[0], value[1], value[2], value[3], + value[4], value[5], value[6], value[7], + ERR_PTR(status)); + if (status) { + netdev_err(netdev, "%s: error reading module EEPROM: status %pe\n", + __func__, ERR_PTR(status)); + return status; } /* Make sure we have enough room for the new block */
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 3572b0e..102d63c 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -361,6 +361,39 @@ void ice_unplug_aux_dev(struct ice_pf *pf) } /** + * ice_rdma_finalize_setup - Complete RDMA setup after VSI is ready + * @pf: ptr to ice_pf + * + * Sets VSI-dependent information and plugs aux device. + * Must be called after ice_init_rdma(), ice_vsi_rebuild(), and + * ice_dcb_rebuild() complete. + */ +void ice_rdma_finalize_setup(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct iidc_rdma_priv_dev_info *privd; + int ret; + + if (!ice_is_rdma_ena(pf) || !pf->cdev_info) + return; + + privd = pf->cdev_info->iidc_priv; + if (!privd || !pf->vsi || !pf->vsi[0] || !pf->vsi[0]->netdev) + return; + + /* Assign VSI info now that VSI is valid */ + privd->netdev = pf->vsi[0]->netdev; + privd->vport_id = pf->vsi[0]->vsi_num; + + /* Update QoS info after DCB has been rebuilt */ + ice_setup_dcb_qos_info(pf, &privd->qos_info); + + ret = ice_plug_aux_dev(pf); + if (ret) + dev_warn(dev, "Failed to plug RDMA aux device: %d\n", ret); +} + +/** * ice_init_rdma - initializes PF for RDMA use * @pf: ptr to ice_pf */ @@ -398,22 +431,14 @@ int ice_init_rdma(struct ice_pf *pf) } cdev->iidc_priv = privd; - privd->netdev = pf->vsi[0]->netdev; privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr; cdev->pdev = pf->pdev; - privd->vport_id = pf->vsi[0]->vsi_num; pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; - ice_setup_dcb_qos_info(pf, &privd->qos_info); - ret = ice_plug_aux_dev(pf); - if (ret) - goto err_plug_aux_dev; + return 0; -err_plug_aux_dev: - pf->cdev_info->adev = NULL; - xa_erase(&ice_aux_id, pf->aux_idx); err_alloc_xa: kfree(privd); err_privd_alloc: @@ -432,7 +457,6 @@ void ice_deinit_rdma(struct ice_pf *pf) if (!ice_is_rdma_ena(pf)) return; - ice_unplug_aux_dev(pf); xa_erase(&ice_aux_id, pf->aux_idx); kfree(pf->cdev_info->iidc_priv); kfree(pf->cdev_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f91f8b6..689c602 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -107,10 +107,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->rxq_map) goto err_rxq_map; - /* There is no need to allocate q_vectors for a loopback VSI. */ - if (vsi->type == ICE_VSI_LB) - return 0; - /* allocate memory for q_vector pointers */ vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors, sizeof(*vsi->q_vectors), GFP_KERNEL); @@ -241,6 +237,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) case ICE_VSI_LB: vsi->alloc_txq = 1; vsi->alloc_rxq = 1; + /* A dummy q_vector, no actual IRQ. */ + vsi->num_q_vectors = 1; break; default: dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type); @@ -2426,14 +2424,21 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi) } break; case ICE_VSI_LB: - ret = ice_vsi_alloc_rings(vsi); + ret = ice_vsi_alloc_q_vectors(vsi); if (ret) goto unroll_vsi_init; + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto unroll_alloc_q_vector; + ret = ice_vsi_alloc_ring_stats(vsi); if (ret) goto unroll_vector_base; + /* Simply map the dummy q_vector to the only rx_ring */ + vsi->rx_rings[0]->q_vector = vsi->q_vectors[0]; + break; default: /* clean up the resources and exit */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ebf48fe..3c36e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4699,8 +4699,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, - vsi->alloc_rxq); + netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back), + ice_get_max_rxq(vsi->back)); if (!netdev) return -ENOMEM; @@ -5138,6 +5138,9 @@ int ice_load(struct ice_pf *pf) if (err) goto err_init_rdma; + /* Finalize RDMA: VSI already created, assign info and plug device */ + ice_rdma_finalize_setup(pf); + ice_service_task_restart(pf); clear_bit(ICE_DOWN, pf->state); @@ -5169,6 +5172,7 @@ void ice_unload(struct ice_pf *pf) devl_assert_locked(priv_to_devlink(pf)); + ice_unplug_aux_dev(pf); ice_deinit_rdma(pf); ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); @@ -5595,6 +5599,7 @@ static int ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); + ice_unplug_aux_dev(pf); ice_deinit_rdma(pf); /* Already suspended?, then there is nothing to do */ @@ -7859,7 +7864,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_health_clear(pf); - ice_plug_aux_dev(pf); + ice_rdma_finalize_setup(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 90f9944..096566c 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -2,6 +2,7 @@ /* Copyright (C) 2019-2021, Intel Corporation. */ #include "ice.h" +#include "ice_lib.h" #include "ice_eswitch.h" #include "devlink/devlink.h" #include "devlink/port.h" @@ -67,7 +68,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) return; vsi = repr->src_vsi; - ice_update_vsi_stats(vsi); + ice_update_eth_stats(vsi); eth_stats = &vsi->eth_stats; stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast + @@ -315,7 +316,7 @@ ice_repr_reg_netdev(struct net_device *netdev, const struct net_device_ops *ops) static int ice_repr_ready_vf(struct ice_repr *repr) { - return !ice_check_vf_ready_for_cfg(repr->vf); + return ice_check_vf_ready_for_cfg(repr->vf); } static int ice_repr_ready_sf(struct ice_repr *repr)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index a5bbce6..a2cd4cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -560,7 +560,9 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) i = 0; } - if (rx_ring->vsi->type == ICE_VSI_PF && + if ((rx_ring->vsi->type == ICE_VSI_PF || + rx_ring->vsi->type == ICE_VSI_SF || + rx_ring->vsi->type == ICE_VSI_LB) && xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c673094..0643017 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -899,6 +899,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + while (ntc != ntu) { struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index b206fba..ec1b75f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -1066,7 +1066,7 @@ bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); int idpf_idc_init(struct idpf_adapter *adapter); int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum iidc_function_type ftype); -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info); +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter); void idpf_idc_deinit_vport_aux_device(struct iidc_rdma_vport_dev_info *vdev_info); void idpf_idc_issue_reset_event(struct iidc_rdma_core_dev_info *cdev_info); void idpf_idc_vdev_mtu_event(struct iidc_rdma_vport_dev_info *vdev_info,
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 40e08a7..bb99d9e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -307,9 +307,6 @@ static int idpf_del_flow_steer(struct net_device *netdev, vport_config = vport->adapter->vport_config[np->vport_idx]; user_config = &vport_config->user_config; - if (!idpf_sideband_action_ena(vport, fsp)) - return -EOPNOTSUPP; - rule = kzalloc_flex(*rule, rule_info, 1); if (!rule) return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index bd4785f..7e4f4ac 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -470,10 +470,11 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, /** * idpf_idc_deinit_core_aux_device - de-initialize Auxiliary Device(s) - * @cdev_info: IDC core device info pointer + * @adapter: driver private data structure */ -void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) +void idpf_idc_deinit_core_aux_device(struct idpf_adapter *adapter) { + struct iidc_rdma_core_dev_info *cdev_info = adapter->cdev_info; struct iidc_rdma_priv_dev_info *privd; if (!cdev_info) @@ -485,6 +486,7 @@ void idpf_idc_deinit_core_aux_device(struct iidc_rdma_core_dev_info *cdev_info) kfree(privd->mapped_mem_regions); kfree(privd); kfree(cdev_info); + adapter->cdev_info = NULL; } /**
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index de3df70..cf966fe 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1318,6 +1318,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, free_rss_key: kfree(rss_data->rss_key); + rss_data->rss_key = NULL; free_qreg_chunks: idpf_vport_deinit_queue_reg_chunks(adapter->vport_config[idx]); free_vector_idxs:
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 05a1620..f6b3b15 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -1314,6 +1314,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc) struct idpf_txq_group *txq_grp = &rsrc->txq_grps[i]; for (unsigned int j = 0; j < txq_grp->num_txq; j++) { + if (!txq_grp->txqs[j]) + continue; + if (idpf_queue_has(FLOW_SCH_EN, txq_grp->txqs[j])) { kfree(txq_grp->txqs[j]->refillq); txq_grp->txqs[j]->refillq = NULL; @@ -1339,6 +1342,9 @@ static void idpf_txq_group_rel(struct idpf_q_vec_rsrc *rsrc) */ static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp) { + if (!rx_qgrp->splitq.bufq_sets) + return; + for (unsigned int i = 0; i < rx_qgrp->splitq.num_bufq_sets; i++) { struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i]; @@ -1854,13 +1860,13 @@ static int idpf_rxq_group_alloc(struct idpf_vport *vport, idpf_queue_assign(HSPLIT_EN, q, hs); idpf_queue_assign(RSC_EN, q, rsc); - bufq_set->num_refillqs = num_rxq; bufq_set->refillqs = kcalloc(num_rxq, swq_size, GFP_KERNEL); if (!bufq_set->refillqs) { err = -ENOMEM; goto err_alloc; } + bufq_set->num_refillqs = num_rxq; for (unsigned int k = 0; k < bufq_set->num_refillqs; k++) { struct idpf_sw_queue *refillq = &bufq_set->refillqs[k]; @@ -2336,7 +2342,7 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) do { struct idpf_splitq_4b_tx_compl_desc *tx_desc; - struct idpf_tx_queue *target; + struct idpf_tx_queue *target = NULL; u32 ctype_gen, id; tx_desc = flow ? &complq->comp[ntc].common : @@ -2356,14 +2362,14 @@ void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) target = complq->txq_grp->txqs[id]; idpf_queue_clear(SW_MARKER, target); - if (target == txq) - break; next: if (unlikely(++ntc == complq->desc_count)) { ntc = 0; gen_flag = !gen_flag; } + if (target == txq) + break; } while (time_before(jiffies, timeout)); idpf_queue_assign(GEN_CHK, complq, gen_flag); @@ -4059,7 +4065,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, continue; name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, - vec_name, vidx); + vec_name, vector); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d5a877e1..113ecfc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -3668,7 +3668,7 @@ void idpf_vc_core_deinit(struct idpf_adapter *adapter) idpf_ptp_release(adapter); idpf_deinit_task(adapter); - idpf_idc_deinit_core_aux_device(adapter->cdev_info); + idpf_idc_deinit_core_aux_device(adapter); idpf_rel_rx_pt_lkup(adapter); idpf_intr_rel(adapter);
diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c index 6ac9c66..cbccd45 100644 --- a/drivers/net/ethernet/intel/idpf/xdp.c +++ b/drivers/net/ethernet/intel/idpf/xdp.c
@@ -47,12 +47,16 @@ static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg) { const struct idpf_vport *vport = rxq->q_vector->vport; const struct idpf_q_vec_rsrc *rsrc; + u32 frag_size = 0; bool split; int err; + if (idpf_queue_has(XSK, rxq)) + frag_size = rxq->bufq_sets[0].bufq.truesize; + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, rxq->q_vector->napi.napi_id, - rxq->rx_buf_size); + frag_size); if (err) return err;
diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c index 676cbd8..d95d3ef 100644 --- a/drivers/net/ethernet/intel/idpf/xsk.c +++ b/drivers/net/ethernet/intel/idpf/xsk.c
@@ -403,6 +403,7 @@ int idpf_xskfq_init(struct idpf_buf_queue *bufq) bufq->pending = fq.pending; bufq->thresh = fq.thresh; bufq->rx_buf_size = fq.buf_len; + bufq->truesize = fq.truesize; if (!idpf_xskfq_refill(bufq)) netdev_err(bufq->pool->netdev,
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c index 30ce5fb..ce4a7b5 100644 --- a/drivers/net/ethernet/intel/igb/igb_xsk.c +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -524,6 +524,16 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) return nb_pkts < budget; } +static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector) +{ + u32 eics = 0; + + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + eics = q_vector->eims_value; + + return eics; +} + int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct igb_adapter *adapter = netdev_priv(dev); @@ -542,20 +552,32 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) ring = adapter->tx_ring[qid]; - if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) - return -ENETDOWN; - if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { + if (flags & XDP_WAKEUP_TX) { + if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) + return -ENETDOWN; + + eics |= igb_sw_irq_prep(ring->q_vector); + } + + if (flags & XDP_WAKEUP_RX) { + /* If IGB_FLAG_QUEUE_PAIRS is active, the q_vector + * and NAPI is shared between RX and TX. + * If NAPI is already running it would be marked as missed + * from the TX path, making this RX call a NOP + */ + ring = adapter->rx_ring[qid]; + eics |= igb_sw_irq_prep(ring->q_vector); + } + + if (eics) { /* Cause software interrupt */ - if (adapter->flags & IGB_FLAG_HAS_MSIX) { - eics |= ring->q_vector->eims_value; + if (adapter->flags & IGB_FLAG_HAS_MSIX) wr32(E1000_EICS, eics); - } else { + else wr32(E1000_ICS, E1000_ICS_RXDMT0); - } } return 0;
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index a427f05..1723681 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -781,6 +781,8 @@ int igc_ptp_hwtstamp_set(struct net_device *netdev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); void igc_ptp_tx_hang(struct igc_adapter *adapter); +void igc_ptp_clear_xsk_tx_tstamp_queue(struct igc_adapter *adapter, + u16 queue_id); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 27e5c21..72bc5128 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -264,6 +264,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + + /* Clear any lingering XSK TX timestamp requests */ + if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { + struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + + igc_ptp_clear_xsk_tx_tstamp_queue(adapter, tx_ring->queue_index); + } } /** @@ -1730,11 +1737,8 @@ static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (skb->len < 17) { - if (skb_padto(skb, 17)) - return NETDEV_TX_OK; - skb->len = 17; - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); } @@ -6906,28 +6910,29 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, return nxmit; } -static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, - struct igc_q_vector *q_vector) +static u32 igc_sw_irq_prep(struct igc_q_vector *q_vector) { - struct igc_hw *hw = &adapter->hw; u32 eics = 0; - eics |= q_vector->eims_value; - wr32(IGC_EICS, eics); + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + eics = q_vector->eims_value; + + return eics; } int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct igc_adapter *adapter = netdev_priv(dev); - struct igc_q_vector *q_vector; + struct igc_hw *hw = &adapter->hw; struct igc_ring *ring; + u32 eics = 0; if (test_bit(__IGC_DOWN, &adapter->state)) return -ENETDOWN; if (!igc_xdp_is_enabled(adapter)) return -ENXIO; - + /* Check if queue_id is valid. Tx and Rx queue numbers are always same */ if (queue_id >= adapter->num_rx_queues) return -EINVAL; @@ -6936,9 +6941,22 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) if (!ring->xsk_pool) return -ENXIO; - q_vector = adapter->q_vector[queue_id]; - if (!napi_if_scheduled_mark_missed(&q_vector->napi)) - igc_trigger_rxtxq_interrupt(adapter, q_vector); + if (flags & XDP_WAKEUP_RX) + eics |= igc_sw_irq_prep(ring->q_vector); + + if (flags & XDP_WAKEUP_TX) { + /* If IGC_FLAG_QUEUE_PAIRS is active, the q_vector + * and NAPI is shared between RX and TX. + * If NAPI is already running it would be marked as missed + * from the RX path, making this TX call a NOP + */ + ring = adapter->tx_ring[queue_id]; + eics |= igc_sw_irq_prep(ring->q_vector); + } + + if (eics) + /* Cause software interrupt */ + wr32(IGC_EICS, eics); return 0; }
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 7aae83c..3d6b226 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -550,7 +550,8 @@ static void igc_ptp_free_tx_buffer(struct igc_adapter *adapter, tstamp->buffer_type = 0; /* Trigger txrx interrupt for transmit completion */ - igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0); + igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, + XDP_WAKEUP_TX); return; } @@ -576,6 +577,39 @@ static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } +/** + * igc_ptp_clear_xsk_tx_tstamp_queue - Clear pending XSK TX timestamps for a queue + * @adapter: Board private structure + * @queue_id: TX queue index to clear timestamps for + * + * Iterates over all TX timestamp registers and releases any pending + * timestamp requests associated with the given TX queue. This is + * called when an XDP pool is being disabled to ensure no stale + * timestamp references remain. + */ +void igc_ptp_clear_xsk_tx_tstamp_queue(struct igc_adapter *adapter, u16 queue_id) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (tstamp->buffer_type != IGC_TX_BUFFER_TYPE_XSK) + continue; + if (tstamp->xsk_queue_index != queue_id) + continue; + if (!tstamp->xsk_tx_buffer) + continue; + + igc_ptp_free_tx_buffer(adapter, tstamp); + } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 74d3208..b67b580 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -852,7 +852,8 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, if (!mac->get_link_status) goto out; - if (hw->mac.type == ixgbe_mac_e610_vf) { + if (hw->mac.type == ixgbe_mac_e610_vf && + hw->api_version >= ixgbe_mbox_api_16) { ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); if (ret_val) goto out;
diff --git a/drivers/net/ethernet/intel/libeth/xsk.c b/drivers/net/ethernet/intel/libeth/xsk.c index 846e902..4882951 100644 --- a/drivers/net/ethernet/intel/libeth/xsk.c +++ b/drivers/net/ethernet/intel/libeth/xsk.c
@@ -167,6 +167,7 @@ int libeth_xskfq_create(struct libeth_xskfq *fq) fq->pending = fq->count; fq->thresh = libeth_xdp_queue_threshold(fq->count); fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool); + fq->truesize = xsk_pool_get_rx_frag_step(fq->pool); return 0; }
diff --git a/drivers/net/ethernet/intel/libie/fwlog.c b/drivers/net/ethernet/intel/libie/fwlog.c index 79020d9..96bba57 100644 --- a/drivers/net/ethernet/intel/libie/fwlog.c +++ b/drivers/net/ethernet/intel/libie/fwlog.c
@@ -433,17 +433,21 @@ libie_debugfs_module_write(struct file *filp, const char __user *buf, module = libie_find_module_by_dentry(fwlog->debugfs_modules, dentry); if (module < 0) { dev_info(dev, "unknown module\n"); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } cnt = sscanf(cmd_buf, "%s", user_val); - if (cnt != 1) - return -EINVAL; + if (cnt != 1) { + count = -EINVAL; + goto free_cmd_buf; + } log_level = sysfs_match_string(libie_fwlog_level_string, user_val); if (log_level < 0) { dev_info(dev, "unknown log level '%s'\n", user_val); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } if (module != LIBIE_AQC_FW_LOG_ID_MAX) { @@ -458,6 +462,9 @@ libie_debugfs_module_write(struct file *filp, const char __user *buf, fwlog->cfg.module_entries[i].log_level = log_level; } +free_cmd_buf: + kfree(cmd_buf); + return count; } @@ -515,23 +522,31 @@ libie_debugfs_nr_messages_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + count = -EINVAL; + goto free_cmd_buf; + } ret = kstrtos16(user_val, 0, &nr_messages); - if (ret) - return ret; + if (ret) { + count = ret; + goto free_cmd_buf; + } if (nr_messages < LIBIE_AQC_FW_LOG_MIN_RESOLUTION || nr_messages > LIBIE_AQC_FW_LOG_MAX_RESOLUTION) { dev_err(dev, "Invalid FW log number of messages %d, value must be between %d - %d\n", nr_messages, LIBIE_AQC_FW_LOG_MIN_RESOLUTION, LIBIE_AQC_FW_LOG_MAX_RESOLUTION); - return -EINVAL; + count = -EINVAL; + goto free_cmd_buf; } fwlog->cfg.log_resolution = nr_messages; +free_cmd_buf: + kfree(cmd_buf); + return count; } @@ -588,8 +603,10 @@ libie_debugfs_enable_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + ret = -EINVAL; + goto free_cmd_buf; + } ret = kstrtobool(user_val, &enable); if (ret) @@ -624,6 +641,8 @@ libie_debugfs_enable_write(struct file *filp, const char __user *buf, */ if (WARN_ON(ret != (ssize_t)count && ret >= 0)) ret = -EIO; +free_cmd_buf: + kfree(cmd_buf); return ret; } @@ -682,8 +701,10 @@ libie_debugfs_log_size_write(struct file *filp, const char __user *buf, return PTR_ERR(cmd_buf); ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; + if (ret != 1) { + ret = -EINVAL; + goto free_cmd_buf; + } index = sysfs_match_string(libie_fwlog_log_size, user_val); if (index < 0) { @@ -712,6 +733,8 @@ libie_debugfs_log_size_write(struct file *filp, const char __user *buf, */ if (WARN_ON(ret != (ssize_t)count && ret >= 0)) ret = -EIO; +free_cmd_buf: + kfree(cmd_buf); return ret; } @@ -1049,6 +1072,10 @@ void libie_fwlog_deinit(struct libie_fwlog *fwlog) { int status; + /* if FW logging isn't supported it means no configuration was done */ + if (!libie_fwlog_supported(fwlog)) + return; + /* make sure FW logging is disabled to not put the FW in a weird state * for the next driver load */
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index d1b8650c..f442b87 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5016,7 +5016,7 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) if (priv->percpu_pools) numbufs = port->nrxqs * 2; - if (change_percpu) + if (change_percpu && priv->global_tx_fc) mvpp2_bm_pool_update_priv_fc(priv, false); for (i = 0; i < numbufs; i++) @@ -5041,7 +5041,7 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) mvpp2_open(port->dev); } - if (change_percpu) + if (change_percpu && priv->global_tx_fc) mvpp2_bm_pool_update_priv_fc(priv, true); return 0;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index ec55eb2..028dd55 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -554,6 +554,36 @@ static void octep_clean_irqs(struct octep_device *oct) } /** + * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) +{ + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); + + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); + } + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); + } + + /* Flush the previous wrties before writing to RESEND bit */ + smp_wmb(); +} + +/** * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. * * @iq: Octeon Tx queue data structure. @@ -561,21 +591,6 @@ static void octep_clean_irqs(struct octep_device *oct) */ static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) { - u32 pkts_pend = oq->pkts_pending; - - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; - } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; - } - - /* Flush the previous wrties before writing to RESEND bit */ - wmb(); writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -601,7 +616,8 @@ static int octep_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; - napi_complete(napi); + octep_update_pkt(ioq_vector->iq, ioq_vector->oq); + napi_complete_done(napi, rx_done); octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); return rx_done; }
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index f2a7c6a..74de191 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -324,10 +324,16 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, struct octep_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and * reached half its max value. @@ -338,8 +344,9 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -414,7 +421,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, u16 rx_ol_flags; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -499,7 +506,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -522,22 +529,26 @@ int octep_oq_process_rx(struct octep_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_device *oct = oq->octep_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; }
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 562fe94..7f1b93cf 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
@@ -286,28 +286,45 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct) } /** + * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ + +static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) +{ + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); + + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); + } + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); + } + + /* Flush the previous wrties before writing to RESEND bit */ + smp_wmb(); +} + +/** * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. * * @iq: Octeon Tx queue data structure. * @oq: Octeon Rx queue data structure. */ -static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) +static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, + struct octep_vf_oq *oq) { - u32 pkts_pend = oq->pkts_pending; - - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; - } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; - } - - /* Flush the previous wrties before writing to RESEND bit */ - smp_wmb(); writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -333,6 +350,7 @@ static int octep_vf_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; + octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq); if (likely(napi_complete_done(napi, rx_done))) octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index 6f865db..b579d5b 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
@@ -325,9 +325,16 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, struct octep_vf_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and @@ -339,8 +346,9 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -369,7 +377,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, struct sk_buff *skb; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -463,7 +471,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -486,22 +494,26 @@ int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_vf_device *oct = oq->octep_vf_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_vf_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_vf_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; }
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index fb15c79..a29f1ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -327,10 +327,10 @@ static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx, rvu_report_pair_end(fmsg); break; case NIX_AF_RVU_RAS: - intr_val = nix_event_context->nix_af_rvu_err; + intr_val = nix_event_context->nix_af_rvu_ras; rvu_report_pair_start(fmsg, "NIX_AF_RAS"); devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ", - nix_event_context->nix_af_rvu_err); + nix_event_context->nix_af_rvu_ras); devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:"); if (intr_val & BIT_ULL(34)) devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S"); @@ -475,7 +475,7 @@ static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter, if (blkaddr < 0) return blkaddr; - if (nix_event_ctx->nix_af_rvu_int) + if (nix_event_ctx->nix_af_rvu_ras) rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); return 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e5e2ffa..ddc321a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3748,12 +3748,21 @@ static int mtk_xdp_setup(struct net_device *dev, struct bpf_prog *prog, mtk_stop(dev); old_prog = rcu_replace_pointer(eth->prog, prog, lockdep_rtnl_is_held()); + + if (netif_running(dev) && need_update) { + int err; + + err = mtk_open(dev); + if (err) { + rcu_assign_pointer(eth->prog, old_prog); + + return err; + } + } + if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev) && need_update) - return mtk_open(dev); - return 0; }
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index cb30108..cc8c4ef 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -244,6 +244,25 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, return 0; } +static bool +mtk_flow_is_valid_idev(const struct mtk_eth *eth, const struct net_device *idev) +{ + size_t i; + + if (!idev) + return false; + + for (i = 0; i < ARRAY_SIZE(eth->netdev); i++) { + if (!eth->netdev[i]) + continue; + + if (idev->netdev_ops == eth->netdev[i]->netdev_ops) + return true; + } + + return false; +} + static int mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, int ppe_index) @@ -270,7 +289,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, flow_rule_match_meta(rule, &match); if (mtk_is_netsys_v2_or_greater(eth)) { idev = __dev_get_by_index(&init_net, match.key->ingress_ifindex); - if (idev && idev->netdev_ops == eth->netdev[0]->netdev_ops) { + if (mtk_flow_is_valid_idev(eth, idev)) { struct mtk_mac *mac = netdev_priv(idev); if (WARN_ON(mac->ppe_idx >= eth->soc->ppe_num))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 6698ac5..73cf032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -107,9 +107,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, if (err) return err; - err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); - if (err) - return err; + mlx5_fw_version_query(dev, &running_fw, &stored_fw); snprintf(version_str, sizeof(version_str), "%d.%d.%04d", mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 60ba840..afdeb1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -47,7 +47,6 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", sq->sqn, sq->cc, sq->pc); sq->cc = 0; - sq->dma_fifo_cc = 0; sq->pc = 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 415208a..64e1374 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -259,7 +259,6 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { - struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct mlx5e_ipsec_addr *addrs = &attrs->addrs; struct net_device *netdev = sa_entry->dev; struct xfrm_state *x = sa_entry->x; @@ -276,7 +275,7 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - mlx5_query_mac_address(mdev, addr); + ether_addr_copy(addr, netdev->dev_addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index f8eaaf3..abcbd38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -287,6 +287,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_dwork *dwork; struct mlx5e_ipsec_limits limits; u32 rx_mapped_id; + u8 ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; }; struct mlx5_accel_pol_xfrm_attrs {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 197a1c6..329608c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -2912,7 +2912,7 @@ void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) goto out; peer_priv = mlx5_devcom_get_next_peer_data(priv->devcom, &tmp); - if (peer_priv) + if (peer_priv && peer_priv->ipsec) complete_all(&peer_priv->ipsec->comp); mlx5_devcom_for_each_peer_end(priv->devcom);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 33344e00..05faad5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -310,10 +310,11 @@ static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_query(sa_entry, data); } -static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, - u32 mode_param) +static void +mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 mode_param, + struct mlx5_accel_esp_xfrm_attrs *attrs) { - struct mlx5_accel_esp_xfrm_attrs attrs = {}; struct mlx5_wqe_aso_ctrl_seg data = {}; if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { @@ -323,18 +324,7 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, sa_entry->esn_state.overlap = 1; } - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); - - /* It is safe to execute the modify below unlocked since the only flows - * that could affect this HW object, are create, destroy and this work. - * - * Creation flow can't co-exist with this modify work, the destruction - * flow would cancel this work, and this work is a single entity that - * can't conflict with it self. - */ - spin_unlock_bh(&sa_entry->x->lock); - mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); - spin_lock_bh(&sa_entry->x->lock); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, attrs); data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; @@ -370,20 +360,18 @@ static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry, static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; - struct mlx5e_ipsec *ipsec = sa_entry->ipsec; - struct mlx5e_ipsec_aso *aso = ipsec->aso; bool soft_arm, hard_arm; u64 hard_cnt; lockdep_assert_held(&sa_entry->x->lock); - soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm); - hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm); + soft_arm = !MLX5_GET(ipsec_aso, sa_entry->ctx, soft_lft_arm); + hard_arm = !MLX5_GET(ipsec_aso, sa_entry->ctx, hard_lft_arm); if (!soft_arm && !hard_arm) /* It is not lifetime event */ return; - hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); + hard_cnt = MLX5_GET(ipsec_aso, sa_entry->ctx, remove_flow_pkt_cnt); if (!hard_cnt || hard_arm) { /* It is possible to see packet counter equal to zero without * hard limit event armed. Such situation can be if packet @@ -453,11 +441,11 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) struct mlx5e_ipsec_work *work = container_of(_work, struct mlx5e_ipsec_work, work); struct mlx5e_ipsec_sa_entry *sa_entry = work->data; + struct mlx5_accel_esp_xfrm_attrs tmp = {}; struct mlx5_accel_esp_xfrm_attrs *attrs; - struct mlx5e_ipsec_aso *aso; + bool need_modify = false; int ret; - aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; spin_lock_bh(&sa_entry->x->lock); @@ -465,18 +453,22 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) if (ret) goto unlock; - if (attrs->replay_esn.trigger && - !MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) { - u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter); - - mlx5e_ipsec_update_esn_state(sa_entry, mode_param); - } - if (attrs->lft.soft_packet_limit != XFRM_INF) mlx5e_ipsec_handle_limits(sa_entry); + if (attrs->replay_esn.trigger && + !MLX5_GET(ipsec_aso, sa_entry->ctx, esn_event_arm)) { + u32 mode_param = MLX5_GET(ipsec_aso, sa_entry->ctx, + mode_parameter); + + mlx5e_ipsec_update_esn_state(sa_entry, mode_param, &tmp); + need_modify = true; + } + unlock: spin_unlock_bh(&sa_entry->x->lock); + if (need_modify) + mlx5_accel_esp_modify_xfrm(sa_entry, &tmp); kfree(work); } @@ -629,6 +621,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, /* We are in atomic context */ udelay(10); } while (ret && time_is_after_jiffies(expires)); + if (!ret) + memcpy(sa_entry->ctx, aso->ctx, MLX5_ST_SZ_BYTES(ipsec_aso)); spin_unlock_bh(&aso->lock); return ret; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index efcfcdd..268e208 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1589,6 +1589,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi struct skb_shared_info *sinfo; u32 frag_consumed_bytes; struct bpf_prog *prog; + u8 nr_frags_free = 0; struct sk_buff *skb; dma_addr_t addr; u32 truesize; @@ -1631,15 +1632,13 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog) { - u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u8 old_nr_frags = sinfo->nr_frags; if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; - wi -= old_nr_frags - sinfo->nr_frags; - for (pwi = head_wi; pwi < wi; pwi++) pwi->frag_page->frags++; } @@ -1647,10 +1646,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } nr_frags_free = old_nr_frags - sinfo->nr_frags; - if (unlikely(nr_frags_free)) { - wi -= nr_frags_free; + if (unlikely(nr_frags_free)) truesize -= nr_frags_free * frag_info->frag_stride; - } } skb = mlx5e_build_linear_skb( @@ -1666,7 +1663,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (xdp_buff_has_frags(&mxbuf->xdp)) { /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_frags_info(skb, wi - head_wi - 1, + xdp_update_skb_frags_info(skb, wi - head_wi - nr_frags_free - 1, sinfo->xdp_frags_size, truesize, xdp_buff_get_skb_flags(&mxbuf->xdp)); @@ -1957,14 +1954,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u8 new_nr_frags; u32 len; if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; - frag_page -= old_nr_frags - sinfo->nr_frags; - for (pfp = head_page; pfp < frag_page; pfp++) pfp->frags++; @@ -1975,13 +1971,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w return NULL; /* page/packet was consumed by XDP */ } - nr_frags_free = old_nr_frags - sinfo->nr_frags; - if (unlikely(nr_frags_free)) { - frag_page -= nr_frags_free; + new_nr_frags = sinfo->nr_frags; + nr_frags_free = old_nr_frags - new_nr_frags; + if (unlikely(nr_frags_free)) truesize -= (nr_frags_free - 1) * PAGE_SIZE + ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - } len = mxbuf->xdp.data_end - mxbuf->xdp.data; @@ -2003,7 +1998,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *pagep; /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_frags_info(skb, frag_page - head_page, + xdp_update_skb_frags_info(skb, new_nr_frags, sinfo->xdp_frags_size, truesize, xdp_buff_get_skb_flags(&mxbuf->xdp));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 26178d0..faccc60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -1489,24 +1489,24 @@ static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, return err; } -static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) +static u32 mlx5_esw_qos_lag_link_speed_get(struct mlx5_core_dev *mdev, + bool take_rtnl) { struct ethtool_link_ksettings lksettings; struct net_device *slave, *master; u32 speed = SPEED_UNKNOWN; - /* Lock ensures a stable reference to master and slave netdevice - * while port speed of master is queried. - */ - ASSERT_RTNL(); - slave = mlx5_uplink_netdev_get(mdev); if (!slave) goto out; + if (take_rtnl) + rtnl_lock(); master = netdev_master_upper_dev_get(slave); if (master && !__ethtool_get_link_ksettings(master, &lksettings)) speed = lksettings.base.speed; + if (take_rtnl) + rtnl_unlock(); out: mlx5_uplink_netdev_put(mdev, slave); @@ -1514,20 +1514,15 @@ static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) } static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, - bool hold_rtnl_lock, struct netlink_ext_ack *extack) + bool take_rtnl, + struct netlink_ext_ack *extack) { int err; if (!mlx5_lag_is_active(mdev)) goto skip_lag; - if (hold_rtnl_lock) - rtnl_lock(); - - *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); - - if (hold_rtnl_lock) - rtnl_unlock(); + *link_speed_max = mlx5_esw_qos_lag_link_speed_get(mdev, take_rtnl); if (*link_speed_max != (u32)SPEED_UNKNOWN) return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d3af87a..123c967 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1072,10 +1072,11 @@ static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + if (esw->mode == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_is_funcs_handler(esw->dev)) { mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - - flush_workqueue(esw->work_queue); + atomic_inc(&esw->esw_funcs.generation); + } } static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6841cae..c2563be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -335,10 +335,12 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_host_work { struct work_struct work; struct mlx5_eswitch *esw; + int work_gen; }; struct mlx5_esw_functions { struct mlx5_nb nb; + atomic_t generation; bool host_funcs_disabled; u16 num_vfs; u16 num_ec_vfs;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 82d4c14..01f6aec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1241,21 +1241,17 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[peer_vport->index] = flow; } - if (mlx5_esw_host_functions_enabled(esw->dev)) { - mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, - mlx5_core_max_vfs(peer_dev)) { - esw_set_peer_miss_rule_source_port(esw, peer_esw, - spec, - peer_vport->vport); - - flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), - spec, &flow_act, &dest, 1); - if (IS_ERR(flow)) { - err = PTR_ERR(flow); - goto add_vf_flow_err; - } - flows[peer_vport->index] = flow; + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, + peer_vport->vport); + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; } + flows[peer_vport->index] = flow; } if (mlx5_core_ec_sriov_enabled(peer_dev)) { @@ -1347,7 +1343,8 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[peer_vport->index]); } @@ -3582,22 +3579,28 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) } static void -esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, int work_gen, + const u32 *out) { struct devlink *devlink; bool host_pf_disabled; u16 new_num_vfs; + devlink = priv_to_devlink(esw->dev); + devl_lock(devlink); + + /* Stale work from one or more mode changes ago. Bail out. */ + if (work_gen != atomic_read(&esw->esw_funcs.generation)) + goto unlock; + new_num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); host_pf_disabled = MLX5_GET(query_esw_functions_out, out, host_params_context.host_pf_disabled); if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) - return; + goto unlock; - devlink = priv_to_devlink(esw->dev); - devl_lock(devlink); /* Number of VFs can only change from "0 to x" or "x to 0". */ if (esw->esw_funcs.num_vfs > 0) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); @@ -3612,6 +3615,7 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) } } esw->esw_funcs.num_vfs = new_num_vfs; +unlock: devl_unlock(devlink); } @@ -3628,7 +3632,7 @@ static void esw_functions_changed_event_handler(struct work_struct *work) if (IS_ERR(out)) goto out; - esw_vfs_changed_event_handler(esw, out); + esw_vfs_changed_event_handler(esw, host_work->work_gen, out); kvfree(out); out: kfree(host_work); @@ -3648,6 +3652,7 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; + host_work->work_gen = atomic_read(&esw_funcs->generation); INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); @@ -3756,6 +3761,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) return 0; err_vports: + /* rollback to legacy, indicates don't unregister the uplink netdev */ + esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); err_uplink: esw_offloads_steering_cleanup(esw); @@ -4068,6 +4075,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (mlx5_mode == MLX5_ESWITCH_LEGACY) esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) + esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) { if (mlx5_devlink_trap_get_num_active(esw->dev)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index eeb44379..c1f220e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -822,48 +822,63 @@ mlx5_fw_image_pending(struct mlx5_core_dev *dev, return 0; } -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *pending_ver) +void mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) { u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; bool pending_version_exists; int component_index; int err; + *running_ver = 0; + *pending_ver = 0; + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || !MLX5_CAP_MCAM_REG(dev, mcqs)) { mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); - return -EOPNOTSUPP; + return; } component_index = mlx5_get_boot_img_component_index(dev); - if (component_index < 0) - return component_index; + if (component_index < 0) { + mlx5_core_warn(dev, "fw query failed to find boot img component index, err %d\n", + component_index); + return; + } + *running_ver = U32_MAX; /* indicate failure */ err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_RUNNING_VERSION, reg_mcqi_version); - if (err) - return err; + if (!err) + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query running version, err %d\n", + err); - *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); - + *pending_ver = U32_MAX; /* indicate failure */ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); - if (err) - return err; + if (err) { + mlx5_core_warn(dev, "failed to query pending image, err %d\n", + err); + return; + } if (!pending_version_exists) { *pending_ver = 0; - return 0; + return; } err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_STORED_VERSION, reg_mcqi_version); - if (err) - return err; + if (!err) + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query pending version, err %d\n", + err); - *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); - - return 0; + return; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 62b6faa..b8d5f6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -160,8 +160,11 @@ DEFINE_SHOW_ATTRIBUTE(members); void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) { + struct mlx5_lag *ldev = mlx5_lag_dev(dev); struct dentry *dbg; + if (!ldev) + return; dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); dev->priv.dbg.lag_debugfs = dbg;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index c326adb..044adfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1869,8 +1869,12 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) mutex_lock(&ldev->lock); ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); + if (__mlx5_lag_is_active(ldev)) { + if (ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_lag_disable_mpesw(ldev); + else + mlx5_disable_lag(ldev); + } mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 24a91e7..74d5c2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -65,7 +65,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) return err; } -static int enable_mpesw(struct mlx5_lag *ldev) +static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0; int err; @@ -126,7 +126,7 @@ static int enable_mpesw(struct mlx5_lag *ldev) return err; } -static void disable_mpesw(struct mlx5_lag *ldev) +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) { if (ldev->mode == MLX5_LAG_MODE_MPESW) { mlx5_mpesw_metadata_cleanup(ldev); @@ -152,9 +152,9 @@ static void mlx5_mpesw_work(struct work_struct *work) } if (mpesww->op == MLX5_MPESW_OP_ENABLE) - mpesww->result = enable_mpesw(ldev); + mpesww->result = mlx5_lag_enable_mpesw(ldev); else if (mpesww->op == MLX5_MPESW_OP_DISABLE) - disable_mpesw(ldev); + mlx5_lag_disable_mpesw(ldev); unlock: mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(devcom);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index f5d9b5c..b767dbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -31,6 +31,11 @@ int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev); +#else +static inline void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ #ifdef CONFIG_MLX5_ESWITCH void mlx5_mpesw_speed_update_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b635b42..1507e88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -393,8 +393,8 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, struct netlink_ext_ack *extack); -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *stored_ver); +void mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver, + u32 *stored_ver); #ifdef CONFIG_MLX5_CORE_EN int mlx5e_init(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 4af8267..bf6f631 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -193,7 +193,9 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + devl_lock(devlink); mlx5_device_disable_sriov(dev, num_vfs, true, true); + devl_unlock(devlink); } return err; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c index 1b08bbb..0ed8135 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
@@ -1051,8 +1051,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) struct mlx5dr_table *tbl; int ret; - mutex_lock(&dmn->dump_info.dbg_mutex); mlx5dr_domain_lock(dmn); + mutex_lock(&dmn->dump_info.dbg_mutex); ret = dr_dump_domain(file, dmn); if (ret < 0) @@ -1065,8 +1065,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) } unlock_mutex: - mlx5dr_domain_unlock(dmn); mutex_unlock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_unlock(dmn); return ret; }
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 08270db..3c4563c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c
@@ -197,7 +197,7 @@ static int fbnic_dbg_bdq_desc_seq_show(struct seq_file *s, void *v) return 0; } - for (i = 0; i <= ring->size_mask; i++) { + for (i = 0; i < (ring->size_mask + 1) * FBNIC_BD_FRAG_COUNT; i++) { u64 bd = le64_to_cpu(ring->desc[i]); seq_printf(s, "%04x %#04llx %#014llx\n", i,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 9fb91d4..9cd85a0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -927,7 +927,7 @@ static void fbnic_fill_bdq(struct fbnic_ring *bdq) /* Force DMA writes to flush before writing to tail */ dma_wmb(); - writel(i, bdq->doorbell); + writel(i * FBNIC_BD_FRAG_COUNT, bdq->doorbell); } } @@ -2564,7 +2564,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) hpq->tail = 0; hpq->head = 0; - log_size = fls(hpq->size_mask); + log_size = fls(hpq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Store descriptor ring address and size */ fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); @@ -2576,7 +2576,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) if (!ppq->size_mask) goto write_ctl; - log_size = fls(ppq->size_mask); + log_size = fls(ppq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Add enabling of PPQ to BDQ control */ bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 9809652..e03c9d2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -38,7 +38,7 @@ struct fbnic_net; #define FBNIC_MAX_XDPQS 128u /* These apply to TWQs, TCQ, RCQ */ -#define FBNIC_QUEUE_SIZE_MIN 16u +#define FBNIC_QUEUE_SIZE_MIN 64u #define FBNIC_QUEUE_SIZE_MAX SZ_64K #define FBNIC_TXQ_SIZE_DEFAULT 1024
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a3845ed..f0b5dd7 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3053,6 +3053,11 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config, else if (speed == SPEED_100) mac_cr |= MAC_CR_CFG_L_; + if (duplex == DUPLEX_FULL) + mac_cr |= MAC_CR_DPX_; + else + mac_cr &= ~MAC_CR_DPX_; + lan743x_csr_write(adapter, MAC_CR, mac_cr); lan743x_ptp_update_latency(adapter, speed);
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index f5deed3..786186c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1934,6 +1934,7 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); free_workqueue: destroy_workqueue(gc->service_wq); + gc->service_wq = NULL; dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1946,7 +1947,10 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); - destroy_workqueue(gc->service_wq); + if (gc->service_wq) { + destroy_workqueue(gc->service_wq); + gc->service_wq = NULL; + } dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); }
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index ba3467f..48a9ace 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -814,9 +814,6 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) gc->max_num_cqs = 0; } - kfree(hwc->caller_ctx); - hwc->caller_ctx = NULL; - if (hwc->txq) mana_hwc_destroy_wq(hwc, hwc->txq); @@ -826,6 +823,9 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) if (hwc->cq) mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); + kfree(hwc->caller_ctx); + hwc->caller_ctx = NULL; + mana_gd_free_res_map(&hwc->inflight_msg_res); hwc->num_inflight_msg = 0;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9919183..09a53c9 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -766,6 +766,13 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc, } *frag_count = 1; + + /* In the single-buffer path, napi_build_skb() must see the + * actual backing allocation size so skb->truesize reflects + * the full page (or higher-order page), not just the usable + * packet area. + */ + *alloc_size = PAGE_SIZE << get_order(*alloc_size); return; } @@ -1770,8 +1777,14 @@ static void mana_poll_tx_cq(struct mana_cq *cq) ndev = txq->ndev; apc = netdev_priv(ndev); + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the + * doorbell can be rung in time for the hardware's requirement + * of at least one doorbell ring every 8 wraparounds. + */ comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, - CQE_POLLING_BUFFER); + min((cq->gdma_cq->queue_size / + COMP_ENTRY_SIZE) * 4, + CQE_POLLING_BUFFER)); if (comp_read < 1) return; @@ -2156,7 +2169,14 @@ static void mana_poll_rx_cq(struct mana_cq *cq) struct mana_rxq *rxq = cq->rxq; int comp_read, i; - comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the + * doorbell can be rung in time for the hardware's requirement + * of at least one doorbell ring every 8 wraparounds. + */ + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, + min((cq->gdma_cq->queue_size / + COMP_ENTRY_SIZE) * 4, + CQE_POLLING_BUFFER)); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); rxq->xdp_flush = false; @@ -2201,11 +2221,11 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->work_done_since_doorbell = 0; napi_complete_done(&cq->napi, w); - } else if (cq->work_done_since_doorbell > - cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + } else if (cq->work_done_since_doorbell >= + (cq->gdma_cq->queue_size / COMP_ENTRY_SIZE) * 4) { /* MANA hardware requires at least one doorbell ring every 8 * wraparounds of CQ even if there is no need to arm the CQ. - * This driver rings the doorbell as soon as we have exceeded + * This driver rings the doorbell as soon as it has processed * 4 wraparounds. */ mana_gd_ring_cq(gdma_queue, 0); @@ -3412,6 +3432,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) struct auxiliary_device *adev; struct mana_adev *madev; int ret; + int id; madev = kzalloc_obj(*madev); if (!madev) @@ -3421,7 +3442,8 @@ static int add_adev(struct gdma_dev *gd, const char *name) ret = mana_adev_idx_alloc(); if (ret < 0) goto idx_fail; - adev->id = ret; + id = ret; + adev->id = id; adev->name = name; adev->dev.parent = gd->gdma_context->dev; @@ -3447,7 +3469,7 @@ static int add_adev(struct gdma_dev *gd, const char *name) auxiliary_device_uninit(adev); init_fail: - mana_adev_idx_free(adev->id); + mana_adev_idx_free(id); idx_fail: kfree(madev); @@ -3757,7 +3779,9 @@ void mana_rdma_remove(struct gdma_dev *gd) } WRITE_ONCE(gd->rdma_teardown, true); - flush_workqueue(gc->service_wq); + + if (gc->service_wq) + flush_workqueue(gc->service_wq); if (gd->adev) remove_adev(gd);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 8d040e6..637e635 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1719,13 +1719,18 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; - err = ionic_program_mac(lif, mac); - if (err < 0) - return err; + /* Only program macs for virtual functions to avoid losing the permanent + * Mac across warm reset/reboot. + */ + if (lif->ionic->pdev->is_virtfn) { + err = ionic_program_mac(lif, mac); + if (err < 0) + return err; - if (err > 0) - netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", - __func__); + if (err > 0) + netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n", + __func__); + } err = eth_prepare_mac_addr_change(netdev, addr); if (err)
diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index 338a263..15d43e4 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c
@@ -565,7 +565,9 @@ static void emac_alloc_rx_desc_buffers(struct emac_priv *priv) DMA_FROM_DEVICE); if (dma_mapping_error(&priv->pdev->dev, rx_buf->dma_addr)) { dev_err_ratelimited(&ndev->dev, "Mapping skb failed\n"); - goto err_free_skb; + dev_kfree_skb_any(skb); + rx_buf->skb = NULL; + break; } rx_desc_addr = &((struct emac_desc *)rx_ring->desc_addr)[i]; @@ -590,10 +592,6 @@ static void emac_alloc_rx_desc_buffers(struct emac_priv *priv) rx_ring->head = i; return; - -err_free_skb: - dev_kfree_skb_any(skb); - rx_buf->skb = NULL; } /* Returns number of packets received */ @@ -735,7 +733,7 @@ static void emac_tx_mem_map(struct emac_priv *priv, struct sk_buff *skb) struct emac_desc tx_desc, *tx_desc_addr; struct device *dev = &priv->pdev->dev; struct emac_tx_desc_buffer *tx_buf; - u32 head, old_head, frag_num, f; + u32 head, old_head, frag_num, f, i; bool buf_idx; frag_num = skb_shinfo(skb)->nr_frags; @@ -803,6 +801,15 @@ static void emac_tx_mem_map(struct emac_priv *priv, struct sk_buff *skb) err_free_skb: dev_dstats_tx_dropped(priv->ndev); + + i = old_head; + while (i != head) { + emac_free_tx_buf(priv, i); + + if (++i == tx_ring->total_cnt) + i = 0; + } + dev_kfree_skb_any(skb); }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 51c96a7..33667a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -323,6 +323,7 @@ struct stmmac_priv { void __iomem *ptpaddr; void __iomem *estaddr; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned int num_double_vlans; int sfty_irq; int sfty_ce_irq; int sfty_ue_irq;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index edf0799..13d3cac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -156,6 +156,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan); +static void stmmac_vlan_restore(struct stmmac_priv *priv); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -853,6 +854,7 @@ static int stmmac_init_timestamping(struct stmmac_priv *priv) netdev_info(priv->dev, "IEEE 1588-2008 Advanced Timestamp supported\n"); + memset(&priv->tstamp_config, 0, sizeof(priv->tstamp_config)); priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; @@ -4106,6 +4108,8 @@ static int __stmmac_open(struct net_device *dev, phylink_start(priv->phylink); + stmmac_vlan_restore(priv); + ret = stmmac_request_irq(dev); if (ret) goto irq_error; @@ -6765,6 +6769,9 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) hash = 0; } + if (!netif_running(priv->dev)) + return 0; + return stmmac_update_vlan_hash(priv, priv->hw, hash, pmatch, is_double); } @@ -6774,6 +6781,7 @@ static int stmmac_vlan_update(struct stmmac_priv *priv, bool is_double) static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int num_double_vlans; bool is_double = false; int ret; @@ -6785,7 +6793,8 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid is_double = true; set_bit(vid, priv->active_vlans); - ret = stmmac_vlan_update(priv, is_double); + num_double_vlans = priv->num_double_vlans + is_double; + ret = stmmac_vlan_update(priv, num_double_vlans); if (ret) { clear_bit(vid, priv->active_vlans); goto err_pm_put; @@ -6793,9 +6802,15 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid if (priv->hw->num_vlan) { ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); - if (ret) + if (ret) { + clear_bit(vid, priv->active_vlans); + stmmac_vlan_update(priv, priv->num_double_vlans); goto err_pm_put; + } } + + priv->num_double_vlans = num_double_vlans; + err_pm_put: pm_runtime_put(priv->device); @@ -6808,6 +6823,7 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int num_double_vlans; bool is_double = false; int ret; @@ -6819,14 +6835,23 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi is_double = true; clear_bit(vid, priv->active_vlans); + num_double_vlans = priv->num_double_vlans - is_double; + ret = stmmac_vlan_update(priv, num_double_vlans); + if (ret) { + set_bit(vid, priv->active_vlans); + goto del_vlan_error; + } if (priv->hw->num_vlan) { ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); - if (ret) + if (ret) { + set_bit(vid, priv->active_vlans); + stmmac_vlan_update(priv, priv->num_double_vlans); goto del_vlan_error; + } } - ret = stmmac_vlan_update(priv, is_double); + priv->num_double_vlans = num_double_vlans; del_vlan_error: pm_runtime_put(priv->device); @@ -6834,6 +6859,17 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi return ret; } +static void stmmac_vlan_restore(struct stmmac_priv *priv) +{ + if (!(priv->dev->features & NETIF_F_VLAN_FEATURES)) + return; + + if (priv->hw->num_vlan) + stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw); + + stmmac_vlan_update(priv, priv->num_double_vlans); +} + static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct stmmac_priv *priv = netdev_priv(dev); @@ -8258,10 +8294,10 @@ int stmmac_resume(struct device *dev) stmmac_init_coalesce(priv); phylink_rx_clk_stop_block(priv->phylink); stmmac_set_rx_mode(ndev); - - stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw); phylink_rx_clk_stop_unblock(priv->phylink); + stmmac_vlan_restore(priv); + stmmac_enable_all_queues(priv); stmmac_enable_all_dma_irq(priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index b18404d..e24efe3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c
@@ -76,7 +76,9 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev, } hw->vlan_filter[0] = vid; - vlan_write_single(dev, vid); + + if (netif_running(dev)) + vlan_write_single(dev, vid); return 0; } @@ -97,12 +99,15 @@ static int vlan_add_hw_rx_fltr(struct net_device *dev, return -EPERM; } - ret = vlan_write_filter(dev, hw, index, val); + if (netif_running(dev)) { + ret = vlan_write_filter(dev, hw, index, val); + if (ret) + return ret; + } - if (!ret) - hw->vlan_filter[index] = val; + hw->vlan_filter[index] = val; - return ret; + return 0; } static int vlan_del_hw_rx_fltr(struct net_device *dev, @@ -115,7 +120,9 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev, if (hw->num_vlan == 1) { if ((hw->vlan_filter[0] & VLAN_TAG_VID) == vid) { hw->vlan_filter[0] = 0; - vlan_write_single(dev, 0); + + if (netif_running(dev)) + vlan_write_single(dev, 0); } return 0; } @@ -124,25 +131,23 @@ static int vlan_del_hw_rx_fltr(struct net_device *dev, for (i = 0; i < hw->num_vlan; i++) { if ((hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) && ((hw->vlan_filter[i] & VLAN_TAG_DATA_VID) == vid)) { - ret = vlan_write_filter(dev, hw, i, 0); - if (!ret) - hw->vlan_filter[i] = 0; - else - return ret; + if (netif_running(dev)) { + ret = vlan_write_filter(dev, hw, i, 0); + if (ret) + return ret; + } + + hw->vlan_filter[i] = 0; } } - return ret; + return 0; } static void vlan_restore_hw_rx_fltr(struct net_device *dev, struct mac_device_info *hw) { - void __iomem *ioaddr = hw->pcsr; - u32 value; - u32 hash; - u32 val; int i; /* Single Rx VLAN Filter */ @@ -152,19 +157,8 @@ static void vlan_restore_hw_rx_fltr(struct net_device *dev, } /* Extended Rx VLAN Filter Enable */ - for (i = 0; i < hw->num_vlan; i++) { - if (hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) { - val = hw->vlan_filter[i]; - vlan_write_filter(dev, hw, i, val); - } - } - - hash = readl(ioaddr + VLAN_HASH_TABLE); - if (hash & VLAN_VLHT) { - value = readl(ioaddr + VLAN_TAG); - value |= VLAN_VTHM; - writel(value, ioaddr + VLAN_TAG); - } + for (i = 0; i < hw->num_vlan; i++) + vlan_write_filter(dev, hw, i, hw->vlan_filter[i]); } static void vlan_update_hash(struct mac_device_info *hw, u32 hash, @@ -183,6 +177,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash, value |= VLAN_EDVLP; value |= VLAN_ESVL; value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; } writel(value, ioaddr + VLAN_TAG); @@ -193,6 +191,10 @@ static void vlan_update_hash(struct mac_device_info *hw, u32 hash, value |= VLAN_EDVLP; value |= VLAN_ESVL; value |= VLAN_DOVLTC; + } else { + value &= ~VLAN_EDVLP; + value &= ~VLAN_ESVL; + value &= ~VLAN_DOVLTC; } writel(value | perfect_match, ioaddr + VLAN_TAG);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 5924db6..265ce54 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -391,7 +391,7 @@ static void am65_cpsw_nuss_ndo_slave_set_rx_mode(struct net_device *ndev) cpsw_ale_set_allmulti(common->ale, ndev->flags & IFF_ALLMULTI, port->port_id); - port_mask = ALE_PORT_HOST; + port_mask = BIT(port->port_id) | ALE_PORT_HOST; /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(common->ale, port_mask, -1); @@ -1351,7 +1351,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, ndev_priv = netdev_priv(ndev); am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); - if (port->rx_ts_enabled) + if (port->rx_ts_filter) am65_cpts_rx_timestamp(common->cpts, skb); skb_mark_for_recycle(skb); skb->protocol = eth_type_trans(skb, ndev); @@ -1811,11 +1811,14 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: - port->rx_ts_enabled = false; + port->rx_ts_filter = HWTSTAMP_FILTER_NONE; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + port->rx_ts_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1825,8 +1828,8 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - port->rx_ts_enabled = true; - cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + port->rx_ts_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -1863,7 +1866,7 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, ts_ctrl |= AM65_CPSW_TS_TX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_TX_VLAN_LT1_EN; - if (port->rx_ts_enabled) + if (port->rx_ts_filter) ts_ctrl |= AM65_CPSW_TS_RX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN; @@ -1888,8 +1891,7 @@ static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, cfg->flags = 0; cfg->tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg->rx_filter = port->rx_ts_enabled ? HWTSTAMP_FILTER_PTP_V2_EVENT | - HWTSTAMP_FILTER_PTP_V1_L4_EVENT : HWTSTAMP_FILTER_NONE; + cfg->rx_filter = port->rx_ts_filter; return 0; }
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 917c37e..7750448 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -52,7 +52,7 @@ struct am65_cpsw_port { bool disabled; struct am65_cpsw_slave_data slave; bool tx_ts_enabled; - bool rx_ts_enabled; + enum hwtstamp_rx_filters rx_ts_filter; struct am65_cpsw_qos qos; struct devlink_port devlink_port; struct bpf_prog *xdp_prog;
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index bb969dd..be7b693 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -450,14 +450,13 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry, ale->port_mask_bits); if ((mask & port_mask) == 0) return; /* ports dont intersect, not interested */ - mask &= ~port_mask; + mask &= (~port_mask | ALE_PORT_HOST); - /* free if only remaining port is host port */ - if (mask) + if (mask == 0x0 || mask == ALE_PORT_HOST) + cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); + else cpsw_ale_set_port_mask(ale_entry, mask, ale->port_mask_bits); - else - cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); } int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 0cf9dfe..a28a608 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -902,6 +902,7 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb_reserve(skb, headroom); skb_put(skb, pkt_len); + skb_copy_to_linear_data(skb, xdp->data, pkt_len); skb->dev = ndev; /* RX HW timestamp */ @@ -912,7 +913,6 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb->offload_fwd_mark = emac->offload_fwd_mark; skb->protocol = eth_type_trans(skb, ndev); - skb_mark_for_recycle(skb); napi_gro_receive(&emac->napi_rx, skb); ndev->stats.rx_bytes += pkt_len; ndev->stats.rx_packets++; @@ -962,7 +962,6 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); psdata = cppi5_hdesc_get_psdata(desc_rx); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); count++; xsk_buff_set_size(xdp, pkt_len); xsk_buff_dma_sync_for_cpu(xdp); @@ -988,6 +987,7 @@ static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, emac_dispatch_skb_zc(emac, xdp, psdata); xsk_buff_free(xdp); } + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); } if (xdp_status & ICSSG_XDP_REDIR) @@ -1057,7 +1057,6 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) /* firmware adds 4 CRC bytes, strip them */ pkt_len -= 4; cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* if allocation fails we drop the packet but push the * descriptor back to the ring with old page to prevent a stall @@ -1075,6 +1074,11 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); *xdp_state = emac_run_xdp(emac, &xdp, &pkt_len); + if (*xdp_state == ICSSG_XDP_CONSUMED) { + page_pool_recycle_direct(pool, page); + goto requeue; + } + if (*xdp_state != ICSSG_XDP_PASS) goto requeue; headroom = xdp.data - xdp.data_hard_start; @@ -1110,6 +1114,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) ndev->stats.rx_packets++; requeue: + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); /* queue another RX DMA */ ret = prueth_dma_rx_push_mapped(emac, &emac->rx_chns, new_page, PRUETH_MAX_PKT_SIZE);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 0939994..42a881b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -273,6 +273,14 @@ static int prueth_emac_common_start(struct prueth *prueth) if (ret) goto disable_class; + /* Reset link state to force reconfiguration in + * emac_adjust_link(). Without this, if the link was already up + * before restart, emac_adjust_link() won't detect any state + * change and will skip critical configuration like writing + * speed to firmware. + */ + emac->link = 0; + mutex_lock(&emac->ndev->phydev->lock); emac_adjust_link(emac->ndev); mutex_unlock(&emac->ndev->phydev->lock);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 5ff7421..fcd3aae 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -105,7 +105,7 @@ #define XAXIDMA_BD_HAS_DRE_MASK 0xF00 /* Whether has DRE mask */ #define XAXIDMA_BD_WORDLEN_MASK 0xFF /* Whether has DRE mask */ -#define XAXIDMA_BD_CTRL_LENGTH_MASK 0x007FFFFF /* Requested len */ +#define XAXIDMA_BD_CTRL_LENGTH_MASK GENMASK(25, 0) /* Requested len */ #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ @@ -130,7 +130,7 @@ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ -#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK 0x007FFFFF /* Actual len */ +#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK GENMASK(25, 0) /* Actual len */ #define XAXIDMA_BD_STS_COMPLETE_MASK 0x80000000 /* Completed */ #define XAXIDMA_BD_STS_DEC_ERR_MASK 0x40000000 /* Decode error */ #define XAXIDMA_BD_STS_SLV_ERR_MASK 0x20000000 /* Slave error */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b06e4c3..263c4b6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -770,8 +770,8 @@ static int axienet_device_reset(struct net_device *ndev) * @first_bd: Index of first descriptor to clean up * @nr_bds: Max number of descriptors to clean up * @force: Whether to clean descriptors even if not complete - * @sizep: Pointer to a u32 filled with the total sum of all bytes - * in all cleaned-up descriptors. Ignored if NULL. + * @sizep: Pointer to a u32 accumulating the total byte count of + * completed packets (using skb->len). Ignored if NULL. * @budget: NAPI budget (use 0 when not called from NAPI poll) * * Would either be called after a successful transmit operation, or after @@ -805,6 +805,8 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, DMA_TO_DEVICE); if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + if (sizep) + *sizep += cur_p->skb->len; napi_consume_skb(cur_p->skb, budget); packets++; } @@ -818,9 +820,6 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, wmb(); cur_p->cntrl = 0; cur_p->status = 0; - - if (sizep) - *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; } if (!force) {
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index e1e7f65..b0faa0f 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -403,15 +403,12 @@ static int ixp4xx_hwtstamp_set(struct net_device *netdev, int ret; int ch; - if (!cpu_is_ixp46x()) - return -EOPNOTSUPP; - if (!netif_running(netdev)) return -EINVAL; ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index); if (ret) - return ret; + return -EOPNOTSUPP; ch = PORT2CHANNEL(port); regs = port->timesync_regs;
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 94203eb..93c64db 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -232,6 +232,9 @@ static struct ixp_clock ixp_clock; int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index) { + if (!cpu_is_ixp46x()) + return -ENODEV; + *regs = ixp_clock.regs; *phc_index = ptp_clock_index(ixp_clock.ptp_clock);
diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index de6bc17..15fe4d1 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c
@@ -343,6 +343,7 @@ static int mctp_i2c_recv(struct mctp_i2c_dev *midev) } else { status = NET_RX_DROP; spin_unlock_irqrestore(&midev->lock, flags); + kfree_skb(skb); } if (status == NET_RX_SUCCESS) {
diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index ef860cf..3b5dff1 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c
@@ -329,7 +329,7 @@ static int mctp_usb_probe(struct usb_interface *intf, SET_NETDEV_DEV(netdev, &intf->dev); dev = netdev_priv(netdev); dev->netdev = netdev; - dev->usbdev = usb_get_dev(interface_to_usbdev(intf)); + dev->usbdev = interface_to_usbdev(intf); dev->intf = intf; usb_set_intfdata(intf, dev); @@ -365,7 +365,6 @@ static void mctp_usb_disconnect(struct usb_interface *intf) mctp_unregister_netdev(dev->netdev); usb_free_urb(dev->tx_urb); usb_free_urb(dev->rx_urb); - usb_put_dev(dev->usbdev); free_netdev(dev->netdev); }
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 37415ca..3c9acd6e 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c
@@ -617,7 +617,7 @@ static ssize_t sysdata_release_enabled_show(struct config_item *item, bool release_enabled; dynamic_netconsole_mutex_lock(); - release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); + release_enabled = !!(nt->sysdata_fields & SYSDATA_RELEASE); dynamic_netconsole_mutex_unlock(); return sysfs_emit(buf, "%d\n", release_enabled); @@ -1679,7 +1679,8 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, if (release_len) { release = init_utsname()->release; - scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); + scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%.*s", release, + msg_len, msg); msg_len += release_len; } else { memcpy(nt->buf, msg, msg_len);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 5ec028a..3645ebd 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c
@@ -109,8 +109,11 @@ static int nsim_forward_skb(struct net_device *tx_dev, int ret; ret = __dev_forward_skb(rx_dev, skb); - if (ret) + if (ret) { + if (psp_ext) + __skb_ext_put(psp_ext); return ret; + } nsim_psp_handle_ext(skb, psp_ext);
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index ec2bbc2..5499c15 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c
@@ -70,37 +70,56 @@ static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, peer->tcp.sk_cb.sk_data_ready(sk); } +static struct sk_buff *ovpn_tcp_skb_packet(const struct ovpn_peer *peer, + struct sk_buff *orig_skb, + const int pkt_len, const int pkt_off) +{ + struct sk_buff *ovpn_skb; + int err; + + /* create a new skb with only the content of the current packet */ + ovpn_skb = netdev_alloc_skb(peer->ovpn->dev, pkt_len); + if (unlikely(!ovpn_skb)) + goto err; + + skb_copy_header(ovpn_skb, orig_skb); + err = skb_copy_bits(orig_skb, pkt_off, skb_put(ovpn_skb, pkt_len), + pkt_len); + if (unlikely(err)) { + net_warn_ratelimited("%s: skb_copy_bits failed for peer %u\n", + netdev_name(peer->ovpn->dev), peer->id); + kfree_skb(ovpn_skb); + goto err; + } + + consume_skb(orig_skb); + return ovpn_skb; +err: + kfree_skb(orig_skb); + return NULL; +} + static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) { struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); struct strp_msg *msg = strp_msg(skb); - size_t pkt_len = msg->full_len - 2; - size_t off = msg->offset + 2; + int pkt_len = msg->full_len - 2; u8 opcode; - /* ensure skb->data points to the beginning of the openvpn packet */ - if (!pskb_pull(skb, off)) { - net_warn_ratelimited("%s: packet too small for peer %u\n", - netdev_name(peer->ovpn->dev), peer->id); - goto err; - } - - /* strparser does not trim the skb for us, therefore we do it now */ - if (pskb_trim(skb, pkt_len) != 0) { - net_warn_ratelimited("%s: trimming skb failed for peer %u\n", - netdev_name(peer->ovpn->dev), peer->id); - goto err; - } - - /* we need the first 4 bytes of data to be accessible + /* we need at least 4 bytes of data in the packet * to extract the opcode and the key ID later on */ - if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { + if (unlikely(pkt_len < OVPN_OPCODE_SIZE)) { net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", netdev_name(peer->ovpn->dev), peer->id); goto err; } + /* extract the packet into a new skb */ + skb = ovpn_tcp_skb_packet(peer, skb, pkt_len, msg->offset + 2); + if (unlikely(!skb)) + goto err; + /* DATA_V2 packets are handled in kernel, the rest goes to user space */ opcode = ovpn_opcode_from_skb(skb, 0); if (unlikely(opcode != OVPN_DATA_V2)) { @@ -113,7 +132,7 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) /* The packet size header must be there when sending the packet * to userspace, therefore we put it back */ - skb_push(skb, 2); + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(pkt_len); ovpn_tcp_to_userspace(peer, strp->sk, skb); return; }
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 02fc013..3bd4157 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c
@@ -1866,8 +1866,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - if (!phydev->is_on_sfp_module) - phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1982,9 +1980,6 @@ void phy_detach(struct phy_device *phydev) phydev->phy_link_change = NULL; phydev->phylink = NULL; - if (!phydev->is_on_sfp_module) - phy_led_triggers_unregister(phydev); - if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); @@ -3778,16 +3773,27 @@ static int phy_probe(struct device *dev) /* Set the state to READY by default */ phydev->state = PHY_READY; + /* Register the PHY LED triggers */ + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); + /* Get the LEDs from the device tree, and instantiate standard * LEDs for them. */ - if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) + if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) { err = of_phy_leds(phydev); + if (err) + goto out; + } + + return 0; out: + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + /* Re-assert the reset signal on error */ - if (err) - phy_device_reset(phydev, 1); + phy_device_reset(phydev, 1); return err; } @@ -3801,6 +3807,9 @@ static int phy_remove(struct device *dev) if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) phy_leds_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + phydev->state = PHY_DOWN; phy_cleanup_ports(phydev);
diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index d8f1ce5..6004da5 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c
@@ -375,7 +375,7 @@ static int qca807x_gpio_get(struct gpio_chip *gc, unsigned int offset) reg = QCA807X_MMD7_LED_FORCE_CTRL(offset); val = phy_read_mmd(priv->phy, MDIO_MMD_AN, reg); - return FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); + return !!FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); } static int qca807x_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index f4bf53d..ce89246 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c
@@ -367,6 +367,12 @@ static void sfp_fixup_ignore_tx_fault(struct sfp *sfp) sfp->state_ignore_mask |= SFP_F_TX_FAULT; } +static void sfp_fixup_ignore_tx_fault_and_los(struct sfp *sfp) +{ + sfp_fixup_ignore_tx_fault(sfp); + sfp_fixup_ignore_los(sfp); +} + static void sfp_fixup_ignore_hw(struct sfp *sfp, unsigned int mask) { sfp->state_hw_mask &= ~mask; @@ -474,11 +480,16 @@ static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, { /* Ubiquiti U-Fiber Instant module claims that support all transceiver * types including 10G Ethernet which is not truth. So clear all claimed - * modes and set only one mode which module supports: 1000baseX_Full. + * modes and set only one mode which module supports: 1000baseX_Full, + * along with the Autoneg and pause bits. */ linkmode_zero(caps->link_modes); linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, caps->link_modes); + phy_interface_zero(caps->interfaces); __set_bit(PHY_INTERFACE_MODE_1000BASEX, caps->interfaces); } @@ -530,7 +541,7 @@ static const struct sfp_quirk sfp_quirks[] = { // Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in // their EEPROM SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, - sfp_fixup_ignore_tx_fault), + sfp_fixup_ignore_tx_fault_and_los), // Lantech 8330-262D-E and 8330-265D can operate at 2500base-X, but // incorrectly report 2500MBd NRZ in their EEPROM.
diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 937dd6d..120aeb5 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c
@@ -1290,7 +1290,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, static void __team_port_change_port_removed(struct team_port *port); -static int team_port_del(struct team *team, struct net_device *port_dev) +static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister) { struct net_device *dev = team->dev; struct team_port *port; @@ -1328,7 +1328,13 @@ static int team_port_del(struct team *team, struct net_device *port_dev) __team_port_change_port_removed(port); team_port_set_orig_dev_addr(port); - dev_set_mtu(port_dev, port->orig.mtu); + if (unregister) { + netdev_lock_ops(port_dev); + __netif_set_mtu(port_dev, port->orig.mtu); + netdev_unlock_ops(port_dev); + } else { + dev_set_mtu(port_dev, port->orig.mtu); + } kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); netdev_compute_master_upper_features(team->dev, true); @@ -1632,7 +1638,7 @@ static void team_uninit(struct net_device *dev) ASSERT_RTNL(); list_for_each_entry_safe(port, tmp, &team->port_list, list) - team_port_del(team, port->dev); + team_port_del(team, port->dev, false); __team_change_mode(team, NULL); /* cleanup */ __team_options_unregister(team, team_options, ARRAY_SIZE(team_options)); @@ -1931,7 +1937,16 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) ASSERT_RTNL(); - return team_port_del(team, port_dev); + return team_port_del(team, port_dev, false); +} + +static int team_del_slave_on_unregister(struct net_device *dev, struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + + ASSERT_RTNL(); + + return team_port_del(team, port_dev, true); } static netdev_features_t team_fix_features(struct net_device *dev, @@ -2043,6 +2058,68 @@ static const struct ethtool_ops team_ethtool_ops = { * rt netlink interface ***********************/ +/* For tx path we need a linkup && enabled port and for parse any port + * suffices. + */ +static struct team_port *team_header_port_get_rcu(struct team *team, + bool txable) +{ + struct team_port *port; + + list_for_each_entry_rcu(port, &team->port_list, list) { + if (!txable || team_port_txable(port)) + return port; + } + + return NULL; +} + +static int team_header_create(struct sk_buff *skb, struct net_device *team_dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, true); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->create) + ret = port_ops->create(skb, port->dev, + type, daddr, saddr, len); + } + rcu_read_unlock(); + return ret; +} + +static int team_header_parse(const struct sk_buff *skb, + const struct net_device *team_dev, + unsigned char *haddr) +{ + struct team *team = netdev_priv(team_dev); + const struct header_ops *port_ops; + struct team_port *port; + int ret = 0; + + rcu_read_lock(); + port = team_header_port_get_rcu(team, false); + if (port) { + port_ops = READ_ONCE(port->dev->header_ops); + if (port_ops && port_ops->parse) + ret = port_ops->parse(skb, port->dev, haddr); + } + rcu_read_unlock(); + return ret; +} + +static const struct header_ops team_header_ops = { + .create = team_header_create, + .parse = team_header_parse, +}; + static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { @@ -2051,7 +2128,8 @@ static void team_setup_by_port(struct net_device *dev, if (port_dev->type == ARPHRD_ETHER) dev->header_ops = team->header_ops_cache; else - dev->header_ops = port_dev->header_ops; + dev->header_ops = port_dev->header_ops ? + &team_header_ops : NULL; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; @@ -2924,7 +3002,7 @@ static int team_device_event(struct notifier_block *unused, !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: - team_del_slave(port->team->dev, dev); + team_del_slave_on_unregister(port->team->dev, dev); break; case NETDEV_FEAT_CHANGE: if (!port->team->notifier_ctx) { @@ -2997,3 +3075,4 @@ MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); MODULE_DESCRIPTION("Ethernet team device driver"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); +MODULE_IMPORT_NS("NETDEV_INTERNAL");
diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index a5f93b6..fa5cab9 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h
@@ -244,7 +244,7 @@ tun_vnet_hdr_tnl_from_skb(unsigned int flags, if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, tun_vnet_is_little_endian(flags), - vlan_hlen, true)) { + vlan_hlen, true, false)) { struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; struct skb_shared_info *sinfo = skb_shinfo(skb);
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index cbffa9a..dd53f41 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c
@@ -1395,14 +1395,14 @@ static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) aqc111_write16_cmd_nopm(dev, AQ_ACCESS_MAC, SFR_MEDIUM_STATUS_MODE, 2, ®16); - aqc111_write_cmd(dev, AQ_WOL_CFG, 0, 0, - WOL_CFG_SIZE, &wol_cfg); - aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &aqc111_data->phy_cfg); + aqc111_write_cmd_nopm(dev, AQ_WOL_CFG, 0, 0, + WOL_CFG_SIZE, &wol_cfg); + aqc111_write32_cmd_nopm(dev, AQ_PHY_OPS, 0, 0, + &aqc111_data->phy_cfg); } else { aqc111_data->phy_cfg |= AQ_LOW_POWER; - aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &aqc111_data->phy_cfg); + aqc111_write32_cmd_nopm(dev, AQ_PHY_OPS, 0, 0, + &aqc111_data->phy_cfg); /* Disable RX path */ aqc111_read16_cmd_nopm(dev, AQ_ACCESS_MAC,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 7057c6c..bb99297 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c
@@ -1656,6 +1656,7 @@ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp16 *ndp16; int ret = -EINVAL; + size_t ndp_len; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", @@ -1675,8 +1676,8 @@ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) sizeof(struct usb_cdc_ncm_dpe16)); ret--; /* we process NDP entries except for the last one */ - if ((sizeof(struct usb_cdc_ncm_ndp16) + - ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len) { + ndp_len = struct_size_t(struct usb_cdc_ncm_ndp16, dpe16, ret); + if (ndpoffset + ndp_len > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } @@ -1692,6 +1693,7 @@ int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset) struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp32 *ndp32; int ret = -EINVAL; + size_t ndp_len; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp32)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", @@ -1711,8 +1713,8 @@ int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset) sizeof(struct usb_cdc_ncm_dpe32)); ret--; /* we process NDP entries except for the last one */ - if ((sizeof(struct usb_cdc_ncm_ndp32) + - ret * (sizeof(struct usb_cdc_ncm_dpe32))) > skb_in->len) { + ndp_len = struct_size_t(struct usb_cdc_ncm_ndp32, dpe32, ret); + if (ndpoffset + ndp_len > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; }
diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 613fc69..ee9c48f 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c
@@ -132,11 +132,18 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf) { int status; u8 ethernet_addr[ETH_ALEN]; + static const u8 ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; /* Don't bind to AT command interface */ if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) return -EINVAL; + if (!usb_check_bulk_endpoints(intf, ep_addr)) + return -ENODEV; + dev->in = usb_rcvbulkpipe(dev->udev, 0x81 & USB_ENDPOINT_NUMBER_MASK); dev->out = usb_sndbulkpipe(dev->udev, 0x02 & USB_ENDPOINT_NUMBER_MASK); dev->status = NULL;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index c9efb7d..cb2472b 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c
@@ -765,7 +765,6 @@ static void kaweth_set_rx_mode(struct net_device *net) netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap); - netif_stop_queue(net); if (net->flags & IFF_PROMISC) { packet_filter_bitmap |= KAWETH_PACKET_FILTER_PROMISCUOUS; @@ -775,7 +774,6 @@ static void kaweth_set_rx_mode(struct net_device *net) } kaweth->packet_filter_bitmap = packet_filter_bitmap; - netif_wake_queue(net); } /**************************************************************** @@ -885,6 +883,13 @@ static int kaweth_probe( const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; int rv = -EIO; + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -898,6 +903,12 @@ static int kaweth_probe( (int)udev->descriptor.bLength, (int)udev->descriptor.bDescriptorType); + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(dev, "couldn't find required endpoints\n"); + return -ENODEV; + } + netdev = alloc_etherdev(sizeof(*kaweth)); if (!netdev) return -ENOMEM;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8da8e04..19cdf69 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c
@@ -2094,8 +2094,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) dev->mdiobus->phy_mask = ~(1 << 1); break; case ID_REV_CHIP_ID_7801_: - /* scan thru PHYAD[2..0] */ - dev->mdiobus->phy_mask = ~(0xFF); break; } @@ -3121,6 +3119,10 @@ static int lan78xx_init_ltm(struct lan78xx_net *dev) int ret; u32 buf; + /* LAN7850 is USB 2.0 and does not support LTM */ + if (dev->chipid == ID_REV_CHIP_ID_7850_) + return 0; + ret = lan78xx_read_reg(dev, USB_CFG1, &buf); if (ret < 0) goto init_ltm_failed; @@ -3831,6 +3833,7 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, */ if (!(dev->net->features & NETIF_F_RXCSUM) || unlikely(rx_cmd_a & RX_CMD_A_ICSM_) || + unlikely(rx_cmd_a & RX_CMD_A_CSE_MASK_) || ((rx_cmd_a & RX_CMD_A_FVTG_) && !(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) { skb->ip_summed = CHECKSUM_NONE; @@ -3903,7 +3906,8 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb, return 0; } - if (unlikely(rx_cmd_a & RX_CMD_A_RED_)) { + if (unlikely(rx_cmd_a & RX_CMD_A_RED_) && + (rx_cmd_a & RX_CMD_A_RX_HARD_ERRS_MASK_)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x", rx_cmd_a); } else { @@ -4178,7 +4182,7 @@ static struct skb_data *lan78xx_tx_buf_fill(struct lan78xx_net *dev, } tx_data += len; - entry->length += len; + entry->length += max_t(unsigned int, len, ETH_ZLEN); entry->num_of_packet += skb_shinfo(skb)->gso_segs ?: 1; dev_kfree_skb_any(skb); @@ -4548,8 +4552,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) phylink_disconnect_phy(dev->phylink); rtnl_unlock(); - netif_napi_del(&dev->napi); - unregister_netdev(net); timer_shutdown_sync(&dev->stat_monitor);
diff --git a/drivers/net/usb/lan78xx.h b/drivers/net/usb/lan78xx.h index 968e5e5..17a934a 100644 --- a/drivers/net/usb/lan78xx.h +++ b/drivers/net/usb/lan78xx.h
@@ -74,6 +74,9 @@ #define RX_CMD_A_ICSM_ (0x00004000) #define RX_CMD_A_LEN_MASK_ (0x00003FFF) +#define RX_CMD_A_RX_HARD_ERRS_MASK_ \ + (RX_CMD_A_RX_ERRS_MASK_ & ~RX_CMD_A_CSE_MASK_) + /* Rx Command B */ #define RX_CMD_B_CSUM_SHIFT_ (16) #define RX_CMD_B_CSUM_MASK_ (0xFFFF0000)
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 4f539b5..db85f40 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c
@@ -28,6 +28,17 @@ static const char driver_name[] = "pegasus"; BMSR_100FULL | BMSR_ANEGCAPABLE) #define CARRIER_CHECK_DELAY (2 * HZ) +/* + * USB endpoints. + */ + +enum pegasus_usb_ep { + PEGASUS_USB_EP_CONTROL = 0, + PEGASUS_USB_EP_BULK_IN = 1, + PEGASUS_USB_EP_BULK_OUT = 2, + PEGASUS_USB_EP_INT_IN = 3, +}; + static bool loopback; static bool mii_mode; static char *devid; @@ -542,7 +553,7 @@ static void read_bulk_callback(struct urb *urb) goto tl_sched; goon: usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); rx_status = usb_submit_urb(pegasus->rx_urb, GFP_ATOMIC); @@ -582,7 +593,7 @@ static void rx_fixup(struct tasklet_struct *t) return; } usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); try_again: @@ -710,7 +721,7 @@ static netdev_tx_t pegasus_start_xmit(struct sk_buff *skb, ((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16); skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len); usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb, - usb_sndbulkpipe(pegasus->usb, 2), + usb_sndbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_OUT), pegasus->tx_buff, count, write_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->tx_urb, GFP_ATOMIC))) { @@ -801,8 +812,19 @@ static void unlink_all_urbs(pegasus_t *pegasus) static int alloc_urbs(pegasus_t *pegasus) { + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; int res = -ENOMEM; + if (!usb_check_bulk_endpoints(pegasus->intf, bulk_ep_addr) || + !usb_check_int_endpoints(pegasus->intf, int_ep_addr)) + return -ENODEV; + pegasus->rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!pegasus->rx_urb) { return res; @@ -837,7 +859,7 @@ static int pegasus_open(struct net_device *net) set_registers(pegasus, EthID, 6, net->dev_addr); usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->rx_urb, GFP_KERNEL))) { @@ -848,7 +870,7 @@ static int pegasus_open(struct net_device *net) } usb_fill_int_urb(pegasus->intr_urb, pegasus->usb, - usb_rcvintpipe(pegasus->usb, 3), + usb_rcvintpipe(pegasus->usb, PEGASUS_USB_EP_INT_IN), pegasus->intr_buff, sizeof(pegasus->intr_buff), intr_callback, pegasus, pegasus->intr_interval); if ((res = usb_submit_urb(pegasus->intr_urb, GFP_KERNEL))) { @@ -1133,16 +1155,31 @@ static int pegasus_probe(struct usb_interface *intf, pegasus_t *pegasus; int dev_index = id - pegasus_ids; int res = -ENOMEM; + static const u8 bulk_ep_addr[] = { + PEGASUS_USB_EP_BULK_IN | USB_DIR_IN, + PEGASUS_USB_EP_BULK_OUT | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + PEGASUS_USB_EP_INT_IN | USB_DIR_IN, + 0}; if (pegasus_blacklisted(dev)) return -ENODEV; + /* Verify that all required endpoints are present */ + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(&intf->dev, "Missing or invalid endpoints\n"); + return -ENODEV; + } + net = alloc_etherdev(sizeof(struct pegasus)); if (!net) goto out; pegasus = netdev_priv(net); pegasus->dev_index = dev_index; + pegasus->intf = intf; res = alloc_urbs(pegasus); if (res < 0) { @@ -1154,7 +1191,6 @@ static int pegasus_probe(struct usb_interface *intf, INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier); - pegasus->intf = intf; pegasus->usb = dev; pegasus->net = net;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3a4985b..05acac1 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c
@@ -928,7 +928,7 @@ static int qmi_wwan_resume(struct usb_interface *intf) static const struct driver_info qmi_wwan_info = { .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, + .flags = FLAG_WWAN | FLAG_NOMAXMTU | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, @@ -937,7 +937,7 @@ static const struct driver_info qmi_wwan_info = { static const struct driver_info qmi_wwan_info_quirk_dtr = { .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, + .flags = FLAG_WWAN | FLAG_NOMAXMTU | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4af8572..0c83bbb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c
@@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, {} };
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index ed86ba8..b72ba08 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c
@@ -1829,11 +1829,12 @@ usbnet_probe(struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_NOARP) != 0) net->flags |= IFF_NOARP; - if (net->max_mtu > (dev->hard_mtu - net->hard_header_len)) + if ((dev->driver_info->flags & FLAG_NOMAXMTU) == 0 && + net->max_mtu > (dev->hard_mtu - net->hard_header_len)) net->max_mtu = dev->hard_mtu - net->hard_header_len; - if (net->mtu > net->max_mtu) - net->mtu = net->max_mtu; + if (net->mtu > (dev->hard_mtu - net->hard_header_len)) + net->mtu = dev->hard_mtu - net->hard_header_len; } else if (!info->in || !info->out) status = usbnet_get_endpoints(dev, udev);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 72d6a9c..c0b9bc5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c
@@ -381,8 +381,6 @@ struct receive_queue { struct xdp_buff **xsk_buffs; }; -#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 - /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { struct virtio_net_ctrl_hdr hdr; @@ -486,7 +484,7 @@ struct virtnet_info { /* Must be last as it ends in a flexible-array member. */ TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, hash_key_data, - u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + u8 rss_hash_key_data[NETDEV_RSS_KEY_LEN]; ); }; static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) == @@ -3267,8 +3265,12 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) struct virtio_net_hdr_v1_hash_tunnel *hdr; int num_sg; unsigned hdr_len = vi->hdr_len; + bool feature_hdrlen; bool can_push; + feature_hdrlen = virtio_has_feature(vi->vdev, + VIRTIO_NET_F_GUEST_HDRLEN); + pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); /* Make sure it's safe to cast between formats */ @@ -3288,7 +3290,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, virtio_is_little_endian(vi->vdev), 0, - false)) + false, feature_hdrlen)) return -EPROTO; if (vi->mergeable_rx_bufs) @@ -3351,6 +3353,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); + skb_dst_drop(skb); nf_reset_ct(skb); } @@ -6703,6 +6706,7 @@ static int virtnet_probe(struct virtio_device *vdev) struct virtnet_info *vi; u16 max_queue_pairs; int mtu = 0; + u16 key_sz; /* Find if host supports multiqueue/rss virtio_net device */ max_queue_pairs = 1; @@ -6837,14 +6841,13 @@ static int virtnet_probe(struct virtio_device *vdev) } if (vi->has_rss || vi->has_rss_hash_report) { - vi->rss_key_size = - virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); - if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { - dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", - vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); - err = -EINVAL; - goto free; - } + key_sz = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + + vi->rss_key_size = min_t(u16, key_sz, NETDEV_RSS_KEY_LEN); + if (key_sz > vi->rss_key_size) + dev_warn(&vdev->dev, + "rss_max_key_size=%u exceeds driver limit %u, clamping\n", + key_sz, vi->rss_key_size); vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 05558b6..a94ac82 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c
@@ -1965,12 +1965,14 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, ns_olen = request->len - skb_network_offset(request) - sizeof(struct ipv6hdr) - sizeof(*ns); for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return NULL; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } @@ -2130,6 +2132,11 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) { struct ipv6hdr *pip6; + /* check if nd_tbl is not initiliazed due to + * ipv6.disable=1 set during boot + */ + if (!ipv6_stub->nd_tbl) + return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return false; pip6 = ipv6_hdr(skb);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 957e0b2..73f3939 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c
@@ -2550,6 +2550,8 @@ fst_remove_one(struct pci_dev *pdev) fst_disable_intr(card); free_irq(card->irq, card); + tasklet_kill(&fst_tx_task); + tasklet_kill(&fst_int_task); iounmap(card->ctlmem); iounmap(card->mem);
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 49d959b..85defe1 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include <linux/ieee80211.h> @@ -1110,9 +1110,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_peer *peer; struct ath11k_sta *arsta = ath11k_sta_to_arsta(params->sta); + struct dp_rx_tid *rx_tid; int vdev_id = arsta->arvif->vdev_id; - dma_addr_t paddr; - bool active; int ret; spin_lock_bh(&ab->base_lock); @@ -1124,15 +1123,14 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, return -ENOENT; } - paddr = peer->rx_tid[params->tid].paddr; - active = peer->rx_tid[params->tid].active; + rx_tid = &peer->rx_tid[params->tid]; - if (!active) { + if (!rx_tid->active) { spin_unlock_bh(&ab->base_lock); return 0; } - ret = ath11k_peer_rx_tid_reo_update(ar, peer, peer->rx_tid, 1, 0, false); + ret = ath11k_peer_rx_tid_reo_update(ar, peer, rx_tid, 1, 0, false); spin_unlock_bh(&ab->base_lock); if (ret) { ath11k_warn(ab, "failed to update reo for rx tid %d: %d\n", @@ -1141,7 +1139,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, } ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, - params->sta->addr, paddr, + params->sta->addr, + rx_tid->paddr, params->tid, 1, 1); if (ret) ath11k_warn(ab, "failed to send wmi to delete rx tid %d\n",
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 5a82ede..244d523 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -735,6 +735,7 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, struct ath12k_dp *dp = ath12k_ab_to_dp(ab); struct ath12k_dp_link_peer *peer; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta); + struct ath12k_dp_rx_tid *rx_tid; struct ath12k_link_sta *arsta; int vdev_id; bool active; @@ -770,7 +771,8 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, return 0; } - ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, peer->dp_peer->rx_tid, + rx_tid = &peer->dp_peer->rx_tid[params->tid]; + ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, rx_tid, 1, 0, false); spin_unlock_bh(&dp->dp_lock); if (ret) {
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c6b8890..b253d1e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -5430,7 +5430,7 @@ int ath12k_mac_op_get_txpower(struct ieee80211_hw *hw, ar->last_tx_power_update)) goto send_tx_power; - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = arvif->vdev_id; params.stats_id = WMI_REQUEST_PDEV_STAT; ret = ath12k_mac_get_fw_stats(ar, ¶ms); @@ -13452,7 +13452,7 @@ void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw, /* TODO: Use real NF instead of default one. */ signal = rate_info.rssi_comb; - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = 0; params.stats_id = WMI_REQUEST_VDEV_STAT; @@ -13580,7 +13580,7 @@ void ath12k_mac_op_link_sta_statistics(struct ieee80211_hw *hw, spin_unlock_bh(&ar->ab->dp->dp_lock); if (!signal && ahsta->ahvif->vdev_type == WMI_VDEV_TYPE_STA) { - params.pdev_id = ar->pdev->pdev_id; + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); params.vdev_id = 0; params.stats_id = WMI_REQUEST_VDEV_STAT;
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index eb7615a..48fee93 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -8241,8 +8241,6 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, struct ath12k_fw_stats *stats = parse->stats; struct ath12k *ar; struct ath12k_link_vif *arvif; - struct ieee80211_sta *sta; - struct ath12k_sta *ahsta; struct ath12k_link_sta *arsta; int i, ret = 0; const void *data = ptr; @@ -8278,21 +8276,19 @@ static int ath12k_wmi_tlv_fw_stats_data_parse(struct ath12k_base *ab, arvif = ath12k_mac_get_arvif(ar, le32_to_cpu(src->vdev_id)); if (arvif) { - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), - arvif->bssid, - NULL); - if (sta) { - ahsta = ath12k_sta_to_ahsta(sta); - arsta = &ahsta->deflink; + spin_lock_bh(&ab->base_lock); + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); + if (arsta) { arsta->rssi_beacon = le32_to_cpu(src->beacon_snr); ath12k_dbg(ab, ATH12K_DBG_WMI, "wmi stats vdev id %d snr %d\n", src->vdev_id, src->beacon_snr); } else { - ath12k_dbg(ab, ATH12K_DBG_WMI, - "not found station bssid %pM for vdev stat\n", - arvif->bssid); + ath12k_warn(ab, + "not found link sta with bssid %pM for vdev stat\n", + arvif->bssid); } + spin_unlock_bh(&ab->base_lock); } data += sizeof(*src); @@ -8363,8 +8359,6 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab, struct ath12k_fw_stats *stats = parse->stats; struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; - struct ieee80211_sta *sta; - struct ath12k_sta *ahsta; struct ath12k *ar; int vdev_id; int j; @@ -8400,19 +8394,15 @@ static int ath12k_wmi_tlv_rssi_chain_parse(struct ath12k_base *ab, "stats bssid %pM vif %p\n", arvif->bssid, arvif->ahvif->vif); - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), - arvif->bssid, - NULL); - if (!sta) { - ath12k_dbg(ab, ATH12K_DBG_WMI, - "not found station of bssid %pM for rssi chain\n", - arvif->bssid); + guard(spinlock_bh)(&ab->base_lock); + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); + if (!arsta) { + ath12k_warn(ab, + "not found link sta with bssid %pM for rssi chain\n", + arvif->bssid); return -EPROTO; } - ahsta = ath12k_sta_to_ahsta(sta); - arsta = &ahsta->deflink; - BUILD_BUG_ON(ARRAY_SIZE(arsta->chain_signal) > ARRAY_SIZE(stats_rssi->rssi_avg_beacon));
diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index 121e51c..8b27d8c 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c
@@ -1006,7 +1006,7 @@ static void ath_scan_send_probe(struct ath_softc *sc, skb_set_queue_mapping(skb, IEEE80211_AC_VO); if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, NULL)) - goto error; + return; txctl.txq = sc->tx.txq_map[IEEE80211_AC_VO]; if (ath_tx_start(sc->hw, skb, &txctl)) @@ -1119,10 +1119,8 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, skb->priority = 7; skb_set_queue_mapping(skb, IEEE80211_AC_VO); - if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, &sta)) { - dev_kfree_skb_any(skb); + if (!ieee80211_tx_prepare_skb(sc->hw, vif, skb, band, &sta)) return false; - } break; default: return false;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 28e5df0..d24b80e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -951,11 +951,10 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev) goto out; /* try to attach to the target device */ - sdiodev->bus = brcmf_sdio_probe(sdiodev); - if (IS_ERR(sdiodev->bus)) { - ret = PTR_ERR(sdiodev->bus); + ret = brcmf_sdio_probe(sdiodev); + if (ret) goto out; - } + brcmf_sdiod_host_fixup(sdiodev->func2->card->host); out: if (ret)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 30c1959..30f6fcb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4445,7 +4445,7 @@ brcmf_sdio_prepare_fw_request(struct brcmf_sdio *bus) return fwreq; } -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) { int ret; struct brcmf_sdio *bus; @@ -4551,11 +4551,12 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) goto fail; } - return bus; + return 0; fail: brcmf_sdio_remove(bus); - return ERR_PTR(ret); + sdiodev->bus = NULL; + return ret; } /* Detach and free everything */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h index 0d18ed1..80180d5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -358,7 +358,7 @@ void brcmf_sdiod_freezer_uncount(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_remove(struct brcmf_sdio_dev *sdiodev); -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); void brcmf_sdio_remove(struct brcmf_sdio *bus); void brcmf_sdio_isr(struct brcmf_sdio *bus, bool in_isr);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 8d64a27..36159a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -297,6 +297,11 @@ enum iwl_legacy_cmds { SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E, /** + * @SCAN_START_NOTIFICATION_UMAC: uses &struct iwl_umac_scan_start + */ + SCAN_START_NOTIFICATION_UMAC = 0xb2, + + /** * @MATCH_FOUND_NOTIFICATION: scan match found */ MATCH_FOUND_NOTIFICATION = 0xd9,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 60f0a49..46fcc32 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -1157,6 +1157,16 @@ enum iwl_umac_scan_abort_status { }; /** + * struct iwl_umac_scan_start - scan start notification + * @uid: scan id, &enum iwl_umac_scan_uid_offsets + * @reserved: for future use + */ +struct iwl_umac_scan_start { + __le32 uid; + __le32 reserved; +} __packed; /* SCAN_START_UMAC_API_S_VER_1 */ + +/** * struct iwl_umac_scan_complete - scan complete notification * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @last_schedule: last scheduling line
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 29df747..9215fc7e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c
@@ -111,14 +111,75 @@ static bool iwl_mld_is_nic_ack_enabled(struct iwl_mld *mld, IEEE80211_HE_MAC_CAP2_ACK_EN); } -static void iwl_mld_set_he_support(struct iwl_mld *mld, - struct ieee80211_vif *vif, - struct iwl_mac_config_cmd *cmd) +struct iwl_mld_mac_wifi_gen_sta_iter_data { + struct ieee80211_vif *vif; + struct iwl_mac_wifi_gen_support *support; +}; + +static void iwl_mld_mac_wifi_gen_sta_iter(void *_data, + struct ieee80211_sta *sta) { - if (vif->type == NL80211_IFTYPE_AP) - cmd->wifi_gen.he_ap_support = 1; - else - cmd->wifi_gen.he_support = 1; + struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta); + struct iwl_mld_mac_wifi_gen_sta_iter_data *data = _data; + struct ieee80211_link_sta *link_sta; + unsigned int link_id; + + if (mld_sta->vif != data->vif) + return; + + for_each_sta_active_link(data->vif, sta, link_sta, link_id) { + if (link_sta->he_cap.has_he) + data->support->he_support = 1; + if (link_sta->eht_cap.has_eht) + data->support->eht_support = 1; + } +} + +static void iwl_mld_set_wifi_gen(struct iwl_mld *mld, + struct ieee80211_vif *vif, + struct iwl_mac_wifi_gen_support *support) +{ + struct iwl_mld_mac_wifi_gen_sta_iter_data sta_iter_data = { + .vif = vif, + .support = support, + }; + struct ieee80211_bss_conf *link_conf; + unsigned int link_id; + + switch (vif->type) { + case NL80211_IFTYPE_MONITOR: + /* for sniffer, set to HW capabilities */ + support->he_support = 1; + support->eht_support = mld->trans->cfg->eht_supported; + break; + case NL80211_IFTYPE_AP: + /* for AP set according to the link configs */ + for_each_vif_active_link(vif, link_conf, link_id) { + support->he_ap_support |= link_conf->he_support; + support->eht_support |= link_conf->eht_support; + } + break; + default: + /* + * If we have MLO enabled, then the firmware needs to enable + * address translation for the station(s) we add. That depends + * on having EHT enabled in firmware, which in turn depends on + * mac80211 in the iteration below. + * However, mac80211 doesn't enable capabilities on the AP STA + * until it has parsed the association response successfully, + * so set EHT (and HE as a pre-requisite for EHT) when the vif + * is an MLD. + */ + if (ieee80211_vif_is_mld(vif)) { + support->he_support = 1; + support->eht_support = 1; + } + + ieee80211_iterate_stations_mtx(mld->hw, + iwl_mld_mac_wifi_gen_sta_iter, + &sta_iter_data); + break; + } } /* fill the common part for all interface types */ @@ -128,8 +189,6 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, u32 action) { struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(vif); - struct ieee80211_bss_conf *link_conf; - unsigned int link_id; lockdep_assert_wiphy(mld->wiphy); @@ -147,29 +206,7 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, cmd->nic_not_ack_enabled = cpu_to_le32(!iwl_mld_is_nic_ack_enabled(mld, vif)); - /* If we have MLO enabled, then the firmware needs to enable - * address translation for the station(s) we add. That depends - * on having EHT enabled in firmware, which in turn depends on - * mac80211 in the code below. - * However, mac80211 doesn't enable HE/EHT until it has parsed - * the association response successfully, so just skip all that - * and enable both when we have MLO. - */ - if (ieee80211_vif_is_mld(vif)) { - iwl_mld_set_he_support(mld, vif, cmd); - cmd->wifi_gen.eht_support = 1; - return; - } - - for_each_vif_active_link(vif, link_conf, link_id) { - if (!link_conf->he_support) - continue; - - iwl_mld_set_he_support(mld, vif, cmd); - - /* EHT, if supported, was already set above */ - break; - } + iwl_mld_set_wifi_gen(mld, vif, &cmd->wifi_gen); } static void iwl_mld_fill_mac_cmd_sta(struct iwl_mld *mld,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0c53d6b..71a9a72 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c
@@ -1761,6 +1761,16 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld, if (vif->type == NL80211_IFTYPE_STATION) iwl_mld_link_set_2mhz_block(mld, vif, sta); + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on association to the AP via the + * vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } + /* Now the link_sta's capabilities are set, update the FW */ iwl_mld_config_tlc(mld, vif, sta); @@ -1873,6 +1883,15 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld, /* just removed last TDLS STA, so enable PM */ iwl_mld_update_mac_power(mld, vif, false); } + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on disassociation to the AP via + * the vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } } else { return -EINVAL; }
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 495e9d8f..9af7929 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c
@@ -171,6 +171,7 @@ static const struct iwl_hcmd_names iwl_mld_legacy_names[] = { HCMD_NAME(MISSED_BEACONS_NOTIFICATION), HCMD_NAME(MAC_PM_POWER_TABLE), HCMD_NAME(MFUART_LOAD_NOTIFICATION), + HCMD_NAME(SCAN_START_NOTIFICATION_UMAC), HCMD_NAME(RSS_CONFIG_CMD), HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC), HCMD_NAME(REPLY_RX_MPDU_CMD),
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index f842f51..fbff591 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c
@@ -739,7 +739,7 @@ iwl_mld_set_link_sel_data(struct iwl_mld *mld, /* Ignore any BSS that was not seen in the last MLO scan */ if (ktime_before(link_conf->bss->ts_boottime, - mld->scan.last_mlo_scan_time)) + mld->scan.last_mlo_scan_start_time)) continue; data[n_data].link_id = link_id; @@ -945,7 +945,7 @@ static void _iwl_mld_select_links(struct iwl_mld *mld, if (!mld_vif->authorized || hweight16(usable_links) <= 1) return; - if (WARN(ktime_before(mld->scan.last_mlo_scan_time, + if (WARN(ktime_before(mld->scan.last_mlo_scan_start_time, ktime_sub_ns(ktime_get_boottime_ns(), 5ULL * NSEC_PER_SEC)), "Last MLO scan was too long ago, can't select links\n"))
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c index 240526d..9c88a85 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c
@@ -287,6 +287,8 @@ static void iwl_mld_handle_beacon_notification(struct iwl_mld *mld, * at least enough bytes to cover the structure listed in the CMD_VER_ENTRY. */ +CMD_VERSIONS(scan_start_notif, + CMD_VER_ENTRY(1, iwl_umac_scan_start)) CMD_VERSIONS(scan_complete_notif, CMD_VER_ENTRY(1, iwl_umac_scan_complete)) CMD_VERSIONS(scan_iter_complete_notif, @@ -360,6 +362,7 @@ DEFINE_SIMPLE_CANCELLATION(datapath_monitor, iwl_datapath_monitor_notif, link_id) DEFINE_SIMPLE_CANCELLATION(roc, iwl_roc_notif, activity) DEFINE_SIMPLE_CANCELLATION(scan_complete, iwl_umac_scan_complete, uid) +DEFINE_SIMPLE_CANCELLATION(scan_start, iwl_umac_scan_start, uid) DEFINE_SIMPLE_CANCELLATION(probe_resp_data, iwl_probe_resp_data_notif, mac_id) DEFINE_SIMPLE_CANCELLATION(uapsd_misbehaving_ap, iwl_uapsd_misbehaving_ap_notif, @@ -402,6 +405,8 @@ const struct iwl_rx_handler iwl_mld_rx_handlers[] = { RX_HANDLER_SYNC) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, BA_NOTIF, compressed_ba_notif, RX_HANDLER_SYNC) + RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_START_NOTIFICATION_UMAC, + scan_start_notif) RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_COMPLETE_UMAC, scan_complete_notif) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, SCAN_ITERATION_COMPLETE_UMAC,
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index a1a4cf3..abd4281 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c
@@ -473,6 +473,9 @@ iwl_mld_scan_get_cmd_gen_flags(struct iwl_mld *mld, params->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN; + if (scan_status == IWL_MLD_SCAN_INT_MLO) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_NTF_START; + if (params->enable_6ghz_passive) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN; @@ -1817,9 +1820,6 @@ static void iwl_mld_int_mlo_scan_start(struct iwl_mld *mld, ret = _iwl_mld_single_scan_start(mld, vif, req, &ies, IWL_MLD_SCAN_INT_MLO); - if (!ret) - mld->scan.last_mlo_scan_time = ktime_get_boottime_ns(); - IWL_DEBUG_SCAN(mld, "Internal MLO scan: ret=%d\n", ret); } @@ -1904,6 +1904,30 @@ void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, ieee80211_sched_scan_results(mld->hw); } +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt) +{ + struct iwl_umac_scan_complete *notif = (void *)pkt->data; + u32 uid = le32_to_cpu(notif->uid); + + if (IWL_FW_CHECK(mld, uid >= ARRAY_SIZE(mld->scan.uid_status), + "FW reports out-of-range scan UID %d\n", uid)) + return; + + if (IWL_FW_CHECK(mld, !(mld->scan.uid_status[uid] & mld->scan.status), + "FW reports scan UID %d we didn't trigger\n", uid)) + return; + + IWL_DEBUG_SCAN(mld, "Scan started: uid=%u type=%u\n", uid, + mld->scan.uid_status[uid]); + if (IWL_FW_CHECK(mld, mld->scan.uid_status[uid] != IWL_MLD_SCAN_INT_MLO, + "FW reports scan start notification %d we didn't trigger\n", + mld->scan.uid_status[uid])) + return; + + mld->scan.last_mlo_scan_start_time = ktime_get_boottime_ns(); +} + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.h b/drivers/net/wireless/intel/iwlwifi/mld/scan.h index 69110f0..de5620e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.h
@@ -27,6 +27,9 @@ int iwl_mld_sched_scan_start(struct iwl_mld *mld, void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt); + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); @@ -114,8 +117,8 @@ enum iwl_mld_traffic_load { * in jiffies. * @last_start_time_jiffies: stores the last start time in jiffies * (interface up/reset/resume). - * @last_mlo_scan_time: start time of the last MLO scan in nanoseconds since - * boot. + * @last_mlo_scan_start_time: start time of the last MLO scan in nanoseconds + * since boot. */ struct iwl_mld_scan { /* Add here fields that need clean up on restart */ @@ -136,7 +139,7 @@ struct iwl_mld_scan { void *cmd; unsigned long last_6ghz_passive_jiffies; unsigned long last_start_time_jiffies; - u64 last_mlo_scan_time; + u64 last_mlo_scan_start_time; }; /**
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index a19f9d2..9a74f60 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -2807,7 +2807,7 @@ static void iwl_mvm_nd_match_info_handler(struct iwl_mvm *mvm, if (IS_ERR_OR_NULL(vif)) return; - if (len < sizeof(struct iwl_scan_offload_match_info)) { + if (len < sizeof(struct iwl_scan_offload_match_info) + matches_len) { IWL_ERR(mvm, "Invalid scan match info notification\n"); return; }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 43cf94c..6cc7866 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -470,7 +470,8 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) .dataflags[0] = IWL_HCMD_DFL_NOCOPY, }; - if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { + if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210 || + !mvm->trans->cfg->uhb_supported) { IWL_DEBUG_RADIO(mvm, "UATS feature is not supported\n"); return; }
diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index d44e02c6..dd97f1b 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c
@@ -799,8 +799,8 @@ static void lbs_free_adapter(struct lbs_private *priv) { lbs_free_cmd_buffer(priv); kfifo_free(&priv->event_fifo); - timer_delete(&priv->command_timer); - timer_delete(&priv->tx_lockup_timer); + timer_delete_sync(&priv->command_timer); + timer_delete_sync(&priv->tx_lockup_timer); } static const struct net_device_ops lbs_netdev_ops = {
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 4370410..eb28fe7 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -3148,7 +3148,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, SET_NETDEV_DEV(dev, adapter->dev); ret = dev_alloc_name(dev, name); - if (ret) + if (ret < 0) goto err_alloc_name; priv->dfs_cac_workqueue = alloc_workqueue("MWIFIEX_DFS_CAC-%s",
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index 3304b59..b41ca14 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -413,6 +413,7 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 871b671..0d94359 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c
@@ -668,6 +668,7 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) tid = MT_TX_ADDBA;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 2560e2f..d4f3ee9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -800,6 +800,7 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && mgmt->u.action.category == WLAN_CATEGORY_BACK && mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { if (is_mt7990(&dev->mt76))
diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index ff9176c..63b0447 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c
@@ -63,10 +63,8 @@ mt76_scan_send_probe(struct mt76_dev *dev, struct cfg80211_ssid *ssid) rcu_read_lock(); - if (!ieee80211_tx_prepare_skb(phy->hw, vif, skb, band, NULL)) { - ieee80211_free_txskb(phy->hw, skb); + if (!ieee80211_tx_prepare_skb(phy->hw, vif, skb, band, NULL)) goto out; - } info = IEEE80211_SKB_CB(skb); if (req->no_cck)
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index f354b11..944b2a8 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -163,7 +163,7 @@ int wilc_scan(struct wilc_vif *vif, u8 scan_source, u32 index = 0; u32 i, scan_timeout; u8 *buffer; - u8 valuesize = 0; + u32 valuesize = 0; u8 *search_ssid_vals = NULL; const u8 ch_list_len = request->n_channels; struct host_if_drv *hif_drv = vif->hif_drv;
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 8c8e074..c7ae803 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -668,7 +668,7 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; struct ieee80211_conf *conf = &hw->conf; - int status = -EOPNOTSUPP; + int status = 0; mutex_lock(&common->mutex);
diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index 120f037..84eb15d 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -264,12 +264,14 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) wiphy_err(priv->hw->wiphy, "PM request failed: %d. WoW is disabled.\n", ret); cw1200_wow_resume(hw); + mutex_unlock(&priv->conf_mutex); return -EBUSY; } /* Force resume if event is coming from the device. */ if (atomic_read(&priv->bh_rx)) { cw1200_wow_resume(hw); + mutex_unlock(&priv->conf_mutex); return -EAGAIN; }
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 2da8c0d..4489aa7 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -402,12 +402,14 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, int hdrlen; u8 *frame; - skb = wl->tx_frames[result->id]; - if (skb == NULL) { - wl1251_error("SKB for packet %d is NULL", result->id); + if (unlikely(result->id >= ARRAY_SIZE(wl->tx_frames) || + wl->tx_frames[result->id] == NULL)) { + wl1251_error("invalid packet id %u", result->id); return; } + skb = wl->tx_frames[result->id]; + info = IEEE80211_SKB_CB(skb); if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 17dd417..1c340a4 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1875,6 +1875,8 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw) wl->wow_enabled); WARN_ON(!wl->wow_enabled); + mutex_lock(&wl->mutex); + ret = pm_runtime_force_resume(wl->dev); if (ret < 0) { wl1271_error("ELP wakeup failure!"); @@ -1891,8 +1893,6 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw) run_irq_work = true; spin_unlock_irqrestore(&wl->wl_lock, flags); - mutex_lock(&wl->mutex); - /* test the recovery flag before calling any SDIO functions */ pending_recovery = test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS, &wl->flags);
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 6241866..75cfbcf 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -210,7 +210,7 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct wl12xx_vif *wlvif, if (skb_headroom(skb) < (total_len - skb->len) && pskb_expand_head(skb, (total_len - skb->len), 0, GFP_ATOMIC)) { wl1271_free_tx_id(wl, id); - return -EAGAIN; + return -ENOMEM; } desc = skb_push(skb, total_len - skb->len);
diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index e89173f..1b6e55e 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -3021,7 +3021,6 @@ static void hw_scan_work(struct work_struct *work) hwsim->tmp_chan->band, NULL)) { rcu_read_unlock(); - kfree_skb(probe); continue; } @@ -6489,7 +6488,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_PMSR_SUPPORT]) { struct cfg80211_pmsr_capabilities *pmsr_capa; - pmsr_capa = kmalloc_obj(*pmsr_capa); + pmsr_capa = kzalloc_obj(*pmsr_capa); if (!pmsr_capa) { ret = -ENOMEM; goto out_free;
diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index 885dc72..97bd39d 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c
@@ -557,7 +557,6 @@ static int virt_wifi_newlink(struct net_device *dev, eth_hw_addr_inherit(dev, priv->lowerdev); netif_stacked_transfer_operstate(priv->lowerdev, dev); - SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc_obj(*dev->ieee80211_ptr); if (!dev->ieee80211_ptr) {
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index d316b35..2ed6736 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c
@@ -1646,7 +1646,7 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* avoid the race with XDP headroom adjustment */ wait_event(module_wq, - xenbus_read_driver_state(np->xbdev->otherend) == + xenbus_read_driver_state(np->xbdev, np->xbdev->otherend) == XenbusStateReconfigured); np->netfront_xdp_enabled = true; @@ -1764,9 +1764,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) do { xenbus_switch_state(dev, XenbusStateInitialising); err = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) != + xenbus_read_driver_state(dev, dev->otherend) != XenbusStateClosed && - xenbus_read_driver_state(dev->otherend) != + xenbus_read_driver_state(dev, dev->otherend) != XenbusStateUnknown, XENNET_TIMEOUT); } while (!err); @@ -2626,31 +2626,31 @@ static void xennet_bus_close(struct xenbus_device *dev) { int ret; - if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed) return; do { xenbus_switch_state(dev, XenbusStateClosing); ret = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing || - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosed || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown, - XENNET_TIMEOUT); + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosing || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); } while (!ret); - if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed) + if (xenbus_read_driver_state(dev, dev->otherend) == XenbusStateClosed) return; do { xenbus_switch_state(dev, XenbusStateClosed); ret = wait_event_timeout(module_wq, - xenbus_read_driver_state(dev->otherend) == - XenbusStateClosed || - xenbus_read_driver_state(dev->otherend) == - XenbusStateUnknown, - XENNET_TIMEOUT); + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev, dev->otherend) == + XenbusStateUnknown, + XENNET_TIMEOUT); } while (!ret); }
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 6a5ce8f..b3d3443 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c
@@ -47,8 +47,8 @@ static int nxp_nci_i2c_set_mode(void *phy_id, { struct nxp_nci_i2c_phy *phy = (struct nxp_nci_i2c_phy *) phy_id; - gpiod_set_value(phy->gpiod_fw, (mode == NXP_NCI_MODE_FW) ? 1 : 0); - gpiod_set_value(phy->gpiod_en, (mode != NXP_NCI_MODE_COLD) ? 1 : 0); + gpiod_set_value_cansleep(phy->gpiod_fw, (mode == NXP_NCI_MODE_FW) ? 1 : 0); + gpiod_set_value_cansleep(phy->gpiod_en, (mode != NXP_NCI_MODE_COLD) ? 1 : 0); usleep_range(10000, 15000); if (mode == NXP_NCI_MODE_COLD)
diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index 6d2f520..1b82b7b 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c
@@ -211,6 +211,9 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, timer_delete(&dev->cmd_timeout); for (i = 0; i < count; i++) { + if (unlikely(!skb_tailroom(dev->recv_skb))) + skb_trim(dev->recv_skb, 0); + skb_put_u8(dev->recv_skb, *data++); if (!pn532_uart_rx_is_frame(dev->recv_skb)) continue;
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 018a806..0f12f86 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c
@@ -628,6 +628,7 @@ static void pn533_usb_disconnect(struct usb_interface *interface) usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); + usb_put_dev(phy->udev); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index bd9621d..45b7d75 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c
@@ -486,14 +486,15 @@ EXPORT_SYMBOL_GPL(nd_synchronize); static void nd_async_device_register(void *d, async_cookie_t cookie) { struct device *dev = d; + struct device *parent = dev->parent; if (device_add(dev) != 0) { dev_err(dev, "%s: failed\n", __func__); put_device(dev); } put_device(dev); - if (dev->parent) - put_device(dev->parent); + if (parent) + put_device(parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5ebcaa..766e9cc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c
@@ -2046,14 +2046,10 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns, if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; } else { - /* - * Use the controller wide atomic write unit. This sucks - * because the limit is defined in terms of logical blocks while - * namespaces can have different formats, and because there is - * no clear language in the specification prohibiting different - * values for different controllers in the subsystem. - */ - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + if (ns->ctrl->awupf) + dev_info_once(ns->ctrl->device, + "AWUPF ignored, only NAWUPF accepted\n"); + atomic_bs = bs; } lim->atomic_write_hw_max = atomic_bs; @@ -3222,7 +3218,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; - subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3655,6 +3650,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -4186,13 +4182,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) nvme_mpath_add_disk(ns, info->anagrpid); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); - /* - * Set ns->disk->device->driver_data to ns so we can access - * ns->head->passthru_err_log_enabled in - * nvme_io_passthru_err_log_enabled_[store | show](). - */ - dev_set_drvdata(disk_to_dev(ns->disk), ns); - return; out_cleanup_ns_from_list: @@ -4845,7 +4834,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int cmd_size) { - struct queue_limits lim = {}; int ret; memset(set, 0, sizeof(*set)); @@ -4865,7 +4853,14 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; - ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); + /* + * If a previous admin queue exists (e.g., from before a reset), + * put it now before allocating a new one to avoid orphaning it. + */ + if (ctrl->admin_q) + blk_put_queue(ctrl->admin_q); + + ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); goto out_free_tagset;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5fe09e3..ac3d4f4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c
@@ -1290,8 +1290,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts) kfree(opts->subsysnqn); kfree(opts->host_traddr); kfree(opts->host_iface); - kfree(opts->dhchap_secret); - kfree(opts->dhchap_ctrl_secret); + kfree_sensitive(opts->dhchap_secret); + kfree_sensitive(opts->dhchap_ctrl_secret); kfree(opts); } EXPORT_SYMBOL_GPL(nvmf_free_options);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 174027d..fc6800a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c
@@ -1300,7 +1300,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) mutex_lock(&head->subsys->lock); /* * We are called when all paths have been removed, and at that point - * head->list is expected to be empty. However, nvme_remove_ns() and + * head->list is expected to be empty. However, nvme_ns_remove() and * nvme_init_ns_head() can run concurrently and so if head->delayed_ * removal_secs is configured, it is possible that by the time we reach * this point, head->list may no longer be empty. Therefore, we recheck @@ -1310,13 +1310,11 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) if (!list_empty(&head->list)) goto out; - if (head->delayed_removal_secs) { - /* - * Ensure that no one could remove this module while the head - * remove work is pending. - */ - if (!try_module_get(THIS_MODULE)) - goto out; + /* + * Ensure that no one could remove this module while the head + * remove work is pending. + */ + if (head->delayed_removal_secs && try_module_get(THIS_MODULE)) { mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9a5f28c..9971045 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h
@@ -180,6 +180,60 @@ enum nvme_quirks { NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22), }; +static inline char *nvme_quirk_name(enum nvme_quirks q) +{ + switch (q) { + case NVME_QUIRK_STRIPE_SIZE: + return "stripe_size"; + case NVME_QUIRK_IDENTIFY_CNS: + return "identify_cns"; + case NVME_QUIRK_DEALLOCATE_ZEROES: + return "deallocate_zeroes"; + case NVME_QUIRK_DELAY_BEFORE_CHK_RDY: + return "delay_before_chk_rdy"; + case NVME_QUIRK_NO_APST: + return "no_apst"; + case NVME_QUIRK_NO_DEEPEST_PS: + return "no_deepest_ps"; + case NVME_QUIRK_QDEPTH_ONE: + return "qdepth_one"; + case NVME_QUIRK_MEDIUM_PRIO_SQ: + return "medium_prio_sq"; + case NVME_QUIRK_IGNORE_DEV_SUBNQN: + return "ignore_dev_subnqn"; + case NVME_QUIRK_DISABLE_WRITE_ZEROES: + return "disable_write_zeroes"; + case NVME_QUIRK_SIMPLE_SUSPEND: + return "simple_suspend"; + case NVME_QUIRK_SINGLE_VECTOR: + return "single_vector"; + case NVME_QUIRK_128_BYTES_SQES: + return "128_bytes_sqes"; + case NVME_QUIRK_SHARED_TAGS: + return "shared_tags"; + case NVME_QUIRK_NO_TEMP_THRESH_CHANGE: + return "no_temp_thresh_change"; + case NVME_QUIRK_NO_NS_DESC_LIST: + return "no_ns_desc_list"; + case NVME_QUIRK_DMA_ADDRESS_BITS_48: + return "dma_address_bits_48"; + case NVME_QUIRK_SKIP_CID_GEN: + return "skip_cid_gen"; + case NVME_QUIRK_BOGUS_NID: + return "bogus_nid"; + case NVME_QUIRK_NO_SECONDARY_TEMP_THRESH: + return "no_secondary_temp_thresh"; + case NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND: + return "force_no_simple_suspend"; + case NVME_QUIRK_BROKEN_MSI: + return "broken_msi"; + case NVME_QUIRK_DMAPOOL_ALIGN_512: + return "dmapool_align_512"; + } + + return "unknown"; +} + /* * Common request structure for NVMe passthrough. All drivers must have * this structure as the first member of their request-private data. @@ -410,6 +464,8 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -442,7 +498,6 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2f0c057..b78ba23 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c
@@ -72,6 +72,13 @@ static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <= (1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE)); +struct quirk_entry { + u16 vendor_id; + u16 dev_id; + u32 enabled_quirks; + u32 disabled_quirks; +}; + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -102,6 +109,143 @@ static unsigned int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); +static struct quirk_entry *nvme_pci_quirk_list; +static unsigned int nvme_pci_quirk_count; + +/* Helper to parse individual quirk names */ +static int nvme_parse_quirk_names(char *quirk_str, struct quirk_entry *entry) +{ + int i; + size_t field_len; + bool disabled, found; + char *p = quirk_str, *field; + + while ((field = strsep(&p, ",")) && *field) { + disabled = false; + found = false; + + if (*field == '^') { + /* Skip the '^' character */ + disabled = true; + field++; + } + + field_len = strlen(field); + for (i = 0; i < 32; i++) { + unsigned int bit = 1U << i; + char *q_name = nvme_quirk_name(bit); + size_t q_len = strlen(q_name); + + if (!strcmp(q_name, "unknown")) + break; + + if (!strcmp(q_name, field) && + q_len == field_len) { + if (disabled) + entry->disabled_quirks |= bit; + else + entry->enabled_quirks |= bit; + found = true; + break; + } + } + + if (!found) { + pr_err("nvme: unrecognized quirk %s\n", field); + return -EINVAL; + } + } + return 0; +} + +/* Helper to parse a single VID:DID:quirk_names entry */ +static int nvme_parse_quirk_entry(char *s, struct quirk_entry *entry) +{ + char *field; + + field = strsep(&s, ":"); + if (!field || kstrtou16(field, 16, &entry->vendor_id)) + return -EINVAL; + + field = strsep(&s, ":"); + if (!field || kstrtou16(field, 16, &entry->dev_id)) + return -EINVAL; + + field = strsep(&s, ":"); + if (!field) + return -EINVAL; + + return nvme_parse_quirk_names(field, entry); +} + +static int quirks_param_set(const char *value, const struct kernel_param *kp) +{ + int count, err, i; + struct quirk_entry *qlist; + char *field, *val, *sep_ptr; + + err = param_set_copystring(value, kp); + if (err) + return err; + + val = kstrdup(value, GFP_KERNEL); + if (!val) + return -ENOMEM; + + if (!*val) + goto out_free_val; + + count = 1; + for (i = 0; val[i]; i++) { + if (val[i] == '-') + count++; + } + + qlist = kcalloc(count, sizeof(*qlist), GFP_KERNEL); + if (!qlist) { + err = -ENOMEM; + goto out_free_val; + } + + i = 0; + sep_ptr = val; + while ((field = strsep(&sep_ptr, "-"))) { + if (nvme_parse_quirk_entry(field, &qlist[i])) { + pr_err("nvme: failed to parse quirk string %s\n", + value); + goto out_free_qlist; + } + + i++; + } + + kfree(nvme_pci_quirk_list); + nvme_pci_quirk_count = count; + nvme_pci_quirk_list = qlist; + goto out_free_val; + +out_free_qlist: + kfree(qlist); +out_free_val: + kfree(val); + return err; +} + +static char quirks_param[128]; +static const struct kernel_param_ops quirks_param_ops = { + .set = quirks_param_set, + .get = param_get_string, +}; + +static struct kparam_string quirks_param_string = { + .maxlen = sizeof(quirks_param), + .string = quirks_param, +}; + +module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0444); +MODULE_PARM_DESC(quirks, "Enable/disable NVMe quirks by specifying " + "quirks=VID:DID:quirk_names"); + static int io_queue_count_set(const char *val, const struct kernel_param *kp) { unsigned int n; @@ -400,7 +544,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); - for (i = 1; i <= dev->online_queues; i++) + for (i = 1; i < dev->online_queues; i++) nvme_dbbuf_free(&dev->queues[i]); } } @@ -1481,14 +1625,16 @@ static irqreturn_t nvme_irq_check(int irq, void *data) static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); + int irq; WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); - disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + irq = pci_irq_vector(pdev, nvmeq->cq_vector); + disable_irq(irq); spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); spin_unlock(&nvmeq->cq_poll_lock); - enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + enable_irq(irq); } static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) @@ -1496,7 +1642,8 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) struct nvme_queue *nvmeq = hctx->driver_data; bool found; - if (!nvme_cqe_pending(nvmeq)) + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags) || + !nvme_cqe_pending(nvmeq)) return 0; spin_lock(&nvmeq->cq_poll_lock); @@ -2774,7 +2921,25 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; - nr_io_queues = dev->nr_allocated_queues - 1; + if (dev->ctrl.tagset) { + /* + * The set's maps are allocated only once at initialization + * time. We can't add special queues later if their mq_map + * wasn't preallocated. + */ + if (dev->ctrl.tagset->nr_maps < 3) + dev->nr_poll_queues = 0; + if (dev->ctrl.tagset->nr_maps < 2) + dev->nr_write_queues = 0; + } + + /* + * The initial number of allocated queue slots may be too large if the + * user reduced the special queue parameters. Cap the value to the + * number we need for this round. + */ + nr_io_queues = min(nvme_max_io_queues(dev), + dev->nr_allocated_queues - 1); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; @@ -3458,12 +3623,25 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } +static struct quirk_entry *detect_dynamic_quirks(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < nvme_pci_quirk_count; i++) + if (pdev->vendor == nvme_pci_quirk_list[i].vendor_id && + pdev->device == nvme_pci_quirk_list[i].dev_id) + return &nvme_pci_quirk_list[i]; + + return NULL; +} + static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, const struct pci_device_id *id) { unsigned long quirks = id->driver_data; int node = dev_to_node(&pdev->dev); struct nvme_dev *dev; + struct quirk_entry *qentry; int ret = -ENOMEM; dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), @@ -3495,6 +3673,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, "platform quirk: setting simple suspend\n"); quirks |= NVME_QUIRK_SIMPLE_SUSPEND; } + qentry = detect_dynamic_quirks(pdev); + if (qentry) { + quirks |= qentry->enabled_quirks; + quirks &= ~qentry->disabled_quirks; + } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (ret) @@ -4095,6 +4278,7 @@ static int __init nvme_init(void) static void __exit nvme_exit(void) { + kfree(nvme_pci_quirk_list); pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); }
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index ad2ecc2..fe7dbe2 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c
@@ -242,7 +242,7 @@ static int nvme_pr_read_keys(struct block_device *bdev, if (rse_len > U32_MAX) return -EINVAL; - rse = kzalloc(rse_len, GFP_KERNEL); + rse = kvzalloc(rse_len, GFP_KERNEL); if (!rse) return -ENOMEM; @@ -267,7 +267,7 @@ static int nvme_pr_read_keys(struct block_device *bdev, } free_rse: - kfree(rse); + kvfree(rse); return ret; }
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 2943094..16c6fea 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c
@@ -601,6 +601,28 @@ static ssize_t dctype_show(struct device *dev, } static DEVICE_ATTR_RO(dctype); +static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int count = 0, i; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned long quirks = ctrl->quirks; + + if (!quirks) + return sysfs_emit(buf, "none\n"); + + for (i = 0; quirks; ++i) { + if (quirks & 1) { + count += sysfs_emit_at(buf, count, "%s\n", + nvme_quirk_name(BIT(i))); + } + quirks >>= 1; + } + + return count; +} +static DEVICE_ATTR_RO(quirks); + #ifdef CONFIG_NVME_HOST_AUTH static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -742,6 +764,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_kato.attr, &dev_attr_cntrltype.attr, &dev_attr_dctype.attr, + &dev_attr_quirks.attr, #ifdef CONFIG_NVME_HOST_AUTH &dev_attr_dhchap_secret.attr, &dev_attr_dhchap_ctrl_secret.attr,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9ab3f61..243dab8 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c
@@ -25,7 +25,8 @@ struct nvme_tcp_queue; -/* Define the socket priority to use for connections were it is desirable +/* + * Define the socket priority to use for connections where it is desirable * that the NIC consider performing optimized packet processing or filtering. * A non-zero value being sufficient to indicate general consideration of any * possible optimization. Making it a module param allows for alternative @@ -926,7 +927,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, req->curr_bio = req->curr_bio->bi_next; /* - * If we don`t have any bios it means that controller + * If we don't have any bios it means the controller * sent more data than we requested, hence error */ if (!req->curr_bio) {
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9de93f6..ca5b08c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c
@@ -1585,7 +1585,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); - queue_work(nvmet_wq, &ctrl->async_event_work); + queue_work(nvmet_aen_wq, &ctrl->async_event_work); } void nvmet_execute_keep_alive(struct nvmet_req *req)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5e43d0a..9238e13 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c
@@ -27,6 +27,8 @@ static DEFINE_IDA(cntlid_ida); struct workqueue_struct *nvmet_wq; EXPORT_SYMBOL_GPL(nvmet_wq); +struct workqueue_struct *nvmet_aen_wq; +EXPORT_SYMBOL_GPL(nvmet_aen_wq); /* * This read/write semaphore is used to synchronize access to configuration @@ -206,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); - queue_work(nvmet_wq, &ctrl->async_event_work); + queue_work(nvmet_aen_wq, &ctrl->async_event_work); } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) @@ -1956,9 +1958,14 @@ static int __init nvmet_init(void) if (!nvmet_wq) goto out_free_buffered_work_queue; + nvmet_aen_wq = alloc_workqueue("nvmet-aen-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!nvmet_aen_wq) + goto out_free_nvmet_work_queue; + error = nvmet_init_debugfs(); if (error) - goto out_free_nvmet_work_queue; + goto out_free_nvmet_aen_work_queue; error = nvmet_init_discovery(); if (error) @@ -1974,6 +1981,8 @@ static int __init nvmet_init(void) nvmet_exit_discovery(); out_exit_debugfs: nvmet_exit_debugfs(); +out_free_nvmet_aen_work_queue: + destroy_workqueue(nvmet_aen_wq); out_free_nvmet_work_queue: destroy_workqueue(nvmet_wq); out_free_buffered_work_queue: @@ -1991,6 +2000,7 @@ static void __exit nvmet_exit(void) nvmet_exit_discovery(); nvmet_exit_debugfs(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_aen_wq); destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq);
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 866048f..b63af3b 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c
@@ -491,6 +491,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; struct nvmet_fc_target_port *targetport = rport->targetport; struct fcloop_tport *tport; + int ret = 0; if (!targetport) { /* @@ -500,12 +501,18 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, * We end up here from delete association exchange: * nvmet_fc_xmt_disconnect_assoc sends an async request. * - * Return success because this is what LLDDs do; silently - * drop the response. + * Return success when remoteport is still online because this + * is what LLDDs do and silently drop the response. Otherwise, + * return with error to signal upper layer to perform the lsrsp + * resource cleanup. */ - lsrsp->done(lsrsp); + if (remoteport->port_state == FC_OBJSTATE_ONLINE) + lsrsp->done(lsrsp); + else + ret = -ENODEV; + kmem_cache_free(lsreq_cache, tls_req); - return 0; + return ret; } memcpy(lsreq->rspaddr, lsrsp->rspbuf,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index b664b58..319d6a5 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h
@@ -501,6 +501,7 @@ extern struct kmem_cache *nvmet_bvec_cache; extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; extern struct workqueue_struct *nvmet_wq; +extern struct workqueue_struct *nvmet_aen_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) {
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2d6eb89..e6e2c3f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c
@@ -2087,6 +2087,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data mutex_unlock(&nvmet_rdma_queue_mutex); flush_workqueue(nvmet_wq); + flush_workqueue(nvmet_aen_wq); } static struct ib_client nvmet_rdma_ib_client = {
diff --git a/drivers/nvmem/imx-ocotp-ele.c b/drivers/nvmem/imx-ocotp-ele.c index 7cf7e80..a0d2985 100644 --- a/drivers/nvmem/imx-ocotp-ele.c +++ b/drivers/nvmem/imx-ocotp-ele.c
@@ -131,6 +131,7 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index, static void imx_ocotp_fixup_dt_cell_info(struct nvmem_device *nvmem, struct nvmem_cell_info *cell) { + cell->raw_len = round_up(cell->bytes, 4); cell->read_post_process = imx_ocotp_cell_pp; }
diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 7bf7656..108d78d 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c
@@ -589,6 +589,7 @@ MODULE_DEVICE_TABLE(of, imx_ocotp_dt_ids); static void imx_ocotp_fixup_dt_cell_info(struct nvmem_device *nvmem, struct nvmem_cell_info *cell) { + cell->raw_len = round_up(cell->bytes, 4); cell->read_post_process = imx_ocotp_cell_pp; }
diff --git a/drivers/nvmem/zynqmp_nvmem.c b/drivers/nvmem/zynqmp_nvmem.c index 7da717d..d297ff1 100644 --- a/drivers/nvmem/zynqmp_nvmem.c +++ b/drivers/nvmem/zynqmp_nvmem.c
@@ -66,7 +66,7 @@ static int zynqmp_efuse_access(void *context, unsigned int offset, dma_addr_t dma_buf; size_t words = bytes / WORD_INBYTES; int ret; - int value; + unsigned int value; char *data; if (bytes % WORD_INBYTES != 0) { @@ -80,7 +80,7 @@ static int zynqmp_efuse_access(void *context, unsigned int offset, } if (pufflag == 1 && flag == EFUSE_WRITE) { - memcpy(&value, val, bytes); + memcpy(&value, val, sizeof(value)); if ((offset == EFUSE_PUF_START_OFFSET || offset == EFUSE_PUF_MID_OFFSET) && value & P_USER_0_64_UPPER_MASK) { @@ -100,7 +100,7 @@ static int zynqmp_efuse_access(void *context, unsigned int offset, if (!efuse) return -ENOMEM; - data = dma_alloc_coherent(dev, sizeof(bytes), + data = dma_alloc_coherent(dev, bytes, &dma_buf, GFP_KERNEL); if (!data) { ret = -ENOMEM; @@ -134,7 +134,7 @@ static int zynqmp_efuse_access(void *context, unsigned int offset, if (flag == EFUSE_READ) memcpy(val, data, bytes); efuse_access_err: - dma_free_coherent(dev, sizeof(bytes), + dma_free_coherent(dev, bytes, data, dma_buf); efuse_data_fail: dma_free_coherent(dev, sizeof(struct xilinx_efuse),
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 295076c..c57ae4d 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -905,6 +905,19 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, * supported, so we avoid reprogramming the region on every MSI, * specifically unmapping immediately after writel(). */ + if (ep->msi_iatu_mapped && (ep->msi_msg_addr != msg_addr || + ep->msi_map_size != map_size)) { + /* + * The host changed the MSI target address or the required + * mapping size changed. Reprogramming the iATU when there are + * operations in flight is unsafe on this controller. However, + * there is no unified way to check if we have operations in + * flight, thus we don't know if we should WARN() or not. + */ + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); + ep->msi_iatu_mapped = false; + } + if (!ep->msi_iatu_mapped) { ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, @@ -915,15 +928,6 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, ep->msi_iatu_mapped = true; ep->msi_msg_addr = msg_addr; ep->msi_map_size = map_size; - } else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr || - ep->msi_map_size != map_size)) { - /* - * The host changed the MSI target address or the required - * mapping size changed. Reprogramming the iATU at runtime is - * unsafe on this controller, so bail out instead of trying to - * update the existing region. - */ - return -EINVAL; } writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset); @@ -1010,6 +1014,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, writel(msg_data, ep->msi_mem + offset); + /* flush posted write before unmap */ + readl(ep->msi_mem + offset); + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); return 0;
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 2c7a406..49c0a2d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c
@@ -2485,6 +2485,14 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) if (!hv_dev) continue; + /* + * If the Hyper-V host doesn't provide a NUMA node for the + * device, default to node 0. With NUMA_NO_NODE the kernel + * may spread work across NUMA nodes, which degrades + * performance on Hyper-V. + */ + set_dev_node(&dev->dev, 0); + if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY && hv_dev->desc.virtual_numa_node < num_possible_nodes()) /* @@ -3778,7 +3786,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr); if (!hbus->wq) { ret = -ENOMEM; - goto free_dom; + goto free_bus; } hdev->channel->next_request_id_callback = vmbus_next_request_id; @@ -3874,8 +3882,6 @@ static int hv_pci_probe(struct hv_device *hdev, vmbus_close(hdev->channel); destroy_wq: destroy_workqueue(hbus->wq); -free_dom: - pci_bus_release_emul_domain_nr(hbus->bridge->domain_nr); free_bus: kfree(hbus); return ret;
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 582938b..3354893 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -894,6 +894,11 @@ static void pci_epf_test_bar_subrange_setup(struct pci_epf_test *epf_test, dev_err(&epf->dev, "pci_epc_set_bar() failed: %d\n", ret); bar->submap = old_submap; bar->num_submap = old_nsub; + ret = pci_epc_set_bar(epc, epf->func_no, epf->vfunc_no, bar); + if (ret) + dev_warn(&epf->dev, "Failed to restore the original BAR mapping: %d\n", + ret); + kfree(submap); goto err; }
diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 6f7dea6..97cff5b 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c
@@ -268,6 +268,48 @@ int pci_pwrctrl_power_on_devices(struct device *parent) } EXPORT_SYMBOL_GPL(pci_pwrctrl_power_on_devices); +/* + * Check whether the pwrctrl device really needs to be created or not. The + * pwrctrl device will only be created if the node satisfies below requirements: + * + * 1. Presence of compatible property with "pci" prefix to match against the + * pwrctrl driver (AND) + * 2. At least one of the power supplies defined in the devicetree node of the + * device (OR) in the remote endpoint parent node to indicate pwrctrl + * requirement. + */ +static bool pci_pwrctrl_is_required(struct device_node *np) +{ + struct device_node *endpoint; + const char *compat; + int ret; + + ret = of_property_read_string(np, "compatible", &compat); + if (ret < 0) + return false; + + if (!strstarts(compat, "pci")) + return false; + + if (of_pci_supply_present(np)) + return true; + + if (of_graph_is_present(np)) { + for_each_endpoint_of_node(np, endpoint) { + struct device_node *remote __free(device_node) = + of_graph_get_remote_port_parent(endpoint); + if (remote) { + if (of_pci_supply_present(remote)) { + of_node_put(endpoint); + return true; + } + } + } + } + + return false; +} + static int pci_pwrctrl_create_device(struct device_node *np, struct device *parent) { @@ -287,19 +329,7 @@ static int pci_pwrctrl_create_device(struct device_node *np, return 0; } - /* - * Sanity check to make sure that the node has the compatible property - * to allow driver binding. - */ - if (!of_property_present(np, "compatible")) - return 0; - - /* - * Check whether the pwrctrl device really needs to be created or not. - * This is decided based on at least one of the power supplies defined - * in the devicetree node of the device or the graph property. - */ - if (!of_pci_supply_present(np) && !of_graph_is_present(np)) { + if (!pci_pwrctrl_is_required(np)) { dev_dbg(parent, "Skipping OF node: %s\n", np->name); return 0; }
diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index 0d03772..c7e4bee 100644 --- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c
@@ -68,13 +68,6 @@ static int pwrseq_pwrctrl_power_off(struct pci_pwrctrl *pwrctrl) return pwrseq_power_off(pwrseq->pwrseq); } -static void devm_pwrseq_pwrctrl_power_off(void *data) -{ - struct pwrseq_pwrctrl *pwrseq = data; - - pwrseq_pwrctrl_power_off(&pwrseq->pwrctrl); -} - static int pwrseq_pwrctrl_probe(struct platform_device *pdev) { const struct pwrseq_pwrctrl_pdata *pdata; @@ -101,11 +94,6 @@ static int pwrseq_pwrctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(pwrseq->pwrseq), "Failed to get the power sequencer\n"); - ret = devm_add_action_or_reset(dev, devm_pwrseq_pwrctrl_power_off, - pwrseq); - if (ret) - return ret; - pwrseq->pwrctrl.power_on = pwrseq_pwrctrl_power_on; pwrseq->pwrctrl.power_off = pwrseq_pwrctrl_power_off;
diff --git a/drivers/pci/pwrctrl/slot.c b/drivers/pci/pwrctrl/slot.c index 082af81..b876392 100644 --- a/drivers/pci/pwrctrl/slot.c +++ b/drivers/pci/pwrctrl/slot.c
@@ -63,7 +63,6 @@ static void devm_slot_pwrctrl_release(void *data) { struct slot_pwrctrl *slot = data; - slot_pwrctrl_power_off(&slot->pwrctrl); regulator_bulk_free(slot->num_supplies, slot->supplies); }
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index b8aaa29..cffc32d 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c
@@ -856,7 +856,7 @@ static void pcifront_try_connect(struct pcifront_device *pdev) int err; /* Only connect once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitialised) return; @@ -876,7 +876,7 @@ static int pcifront_try_disconnect(struct pcifront_device *pdev) enum xenbus_state prev_state; - prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + prev_state = xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename); if (prev_state >= XenbusStateClosing) goto out; @@ -895,7 +895,7 @@ static int pcifront_try_disconnect(struct pcifront_device *pdev) static void pcifront_attach_devices(struct pcifront_device *pdev) { - if (xenbus_read_driver_state(pdev->xdev->nodename) == + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) == XenbusStateReconfiguring) pcifront_connect(pdev); } @@ -909,7 +909,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev) struct pci_dev *pci_dev; char str[64]; - state = xenbus_read_driver_state(pdev->xdev->nodename); + state = xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename); if (state == XenbusStateInitialised) { dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n"); /* We missed Connected and need to initialize. */
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 02467df..1875d5b 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig
@@ -6,7 +6,7 @@ menu "PHY Subsystem" config PHY_COMMON_PROPS - bool + bool "PHY common properties" if KUNIT_ALL_TESTS help This parses properties common between generic PHYs and Ethernet PHYs. @@ -16,8 +16,7 @@ config PHY_COMMON_PROPS_TEST tristate "KUnit tests for PHY common props" if !KUNIT_ALL_TESTS - select PHY_COMMON_PROPS - depends on KUNIT + depends on KUNIT && PHY_COMMON_PROPS default KUNIT_ALL_TESTS help This builds KUnit tests for the PHY common property API.
diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index 2b0fd95..63427fc 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c
@@ -1069,6 +1069,8 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) for (i = 0; i < LYNX_28G_NUM_LANE; i++) { lane = &priv->lane[i]; + if (!lane->phy) + continue; mutex_lock(&lane->phy->mutex);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index df138a5..771bc7c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -990,6 +990,7 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e), @@ -999,13 +1000,11 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = { }; static const struct qmp_phy_init_tbl sm8650_ufsphy_g4_pcs[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x13), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04), }; static const struct qmp_phy_init_tbl sm8650_ufsphy_g5_pcs[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x05), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x05), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HS_G5_SYNC_LENGTH_CAPABILITY, 0x4d),
diff --git a/drivers/phy/spacemit/phy-k1-usb2.c b/drivers/phy/spacemit/phy-k1-usb2.c index 3420613..9215d0b 100644 --- a/drivers/phy/spacemit/phy-k1-usb2.c +++ b/drivers/phy/spacemit/phy-k1-usb2.c
@@ -48,6 +48,9 @@ #define PHY_CLK_HSTXP_EN BIT(3) /* clock hstxp enable */ #define PHY_HSTXP_MODE BIT(4) /* 0: force en_txp to be 1; 1: no force */ +#define PHY_K1_HS_HOST_DISC 0x40 +#define PHY_K1_HS_HOST_DISC_CLR BIT(0) + #define PHY_PLL_DIV_CFG 0x98 #define PHY_FDIV_FRACT_8_15 GENMASK(7, 0) #define PHY_FDIV_FRACT_16_19 GENMASK(11, 8) @@ -142,9 +145,20 @@ static int spacemit_usb2phy_exit(struct phy *phy) return 0; } +static int spacemit_usb2phy_disconnect(struct phy *phy, int port) +{ + struct spacemit_usb2phy *sphy = phy_get_drvdata(phy); + + regmap_update_bits(sphy->regmap_base, PHY_K1_HS_HOST_DISC, + PHY_K1_HS_HOST_DISC_CLR, PHY_K1_HS_HOST_DISC_CLR); + + return 0; +} + static const struct phy_ops spacemit_usb2phy_ops = { .init = spacemit_usb2phy_init, .exit = spacemit_usb2phy_exit, + .disconnect = spacemit_usb2phy_disconnect, .owner = THIS_MODULE, };
diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 6e9ecb8..6b58470 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -1425,6 +1425,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz) dev_err(dev, "%s: Reading \"reg\" from \"%s\" failed: %d\n", __func__, subnode->name, ret); + of_node_put(serdes); return ret; } of_property_read_u32(subnode, "cdns,num-lanes", &num_lanes); @@ -1439,6 +1440,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz) } } + of_node_put(serdes); return 0; }
diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index a8f8210..227c37c 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
@@ -574,10 +574,9 @@ static int cs42l43_pin_probe(struct platform_device *pdev) if (child) { ret = devm_add_action_or_reset(&pdev->dev, cs42l43_fwnode_put, child); - if (ret) { - fwnode_handle_put(child); + if (ret) return ret; - } + if (!child->dev) child->dev = priv->dev; fwnode = child;
diff --git a/drivers/pinctrl/cix/pinctrl-sky1.c b/drivers/pinctrl/cix/pinctrl-sky1.c index 5d0d8be..9388940 100644 --- a/drivers/pinctrl/cix/pinctrl-sky1.c +++ b/drivers/pinctrl/cix/pinctrl-sky1.c
@@ -522,11 +522,10 @@ static int __maybe_unused sky1_pinctrl_resume(struct device *dev) return pinctrl_force_default(spctl->pctl); } -const struct dev_pm_ops sky1_pinctrl_pm_ops = { +static const struct dev_pm_ops sky1_pinctrl_pm_ops = { SET_LATE_SYSTEM_SLEEP_PM_OPS(sky1_pinctrl_suspend, sky1_pinctrl_resume) }; -EXPORT_SYMBOL_GPL(sky1_pinctrl_pm_ops); static int sky1_pinctrl_probe(struct platform_device *pdev) {
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index d6a46fe..3f518dc 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -1135,9 +1135,12 @@ int mtk_pctrl_init(struct platform_device *pdev, goto chip_error; } - ret = mtk_eint_init(pctl, pdev); - if (ret) - goto chip_error; + /* Only initialize EINT if we have EINT pins */ + if (data->eint_hw.ap_num > 0) { + ret = mtk_eint_init(pctl, pdev); + if (ret) + goto chip_error; + } return 0;
diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index dfa32b1..e2293a8 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
@@ -679,7 +679,6 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, unsigned int *num_maps) { struct device *dev = pctldev->dev; - struct device_node *pnode; unsigned long *configs = NULL; unsigned int num_configs = 0; struct property *prop; @@ -693,7 +692,7 @@ static int aml_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, return -ENOENT; } - pnode = of_get_parent(np); + struct device_node *pnode __free(device_node) = of_get_parent(np); if (!pnode) { dev_info(dev, "Missing function node\n"); return -EINVAL;
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 94b1d05..2b030bd0 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c
@@ -351,13 +351,13 @@ int pinconf_generic_parse_dt_config(struct device_node *np, ret = parse_dt_cfg(np, dt_params, ARRAY_SIZE(dt_params), cfg, &ncfg); if (ret) - return ret; + goto out; if (pctldev && pctldev->desc->num_custom_params && pctldev->desc->custom_params) { ret = parse_dt_cfg(np, pctldev->desc->custom_params, pctldev->desc->num_custom_params, cfg, &ncfg); if (ret) - return ret; + goto out; } /* no configs found at all */
diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c index efbf40c..e0874cc 100644 --- a/drivers/pinctrl/pinctrl-amdisp.c +++ b/drivers/pinctrl/pinctrl-amdisp.c
@@ -80,7 +80,7 @@ static int amdisp_get_group_pins(struct pinctrl_dev *pctldev, return 0; } -const struct pinctrl_ops amdisp_pinctrl_ops = { +static const struct pinctrl_ops amdisp_pinctrl_ops = { .get_groups_count = amdisp_get_groups_count, .get_group_name = amdisp_get_group_name, .get_group_pins = amdisp_get_group_pins,
diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index a4b04bf..5c055d3 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c
@@ -627,7 +627,7 @@ static int cy8c95x0_write_regs_mask(struct cy8c95x0_pinctrl *chip, int reg, bitmap_scatter(tmask, mask, chip->map, MAX_LINE); bitmap_scatter(tval, val, chip->map, MAX_LINE); - for_each_set_clump8(offset, bits, tmask, chip->tpin) { + for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) { unsigned int i = offset / 8; write_val = bitmap_get_value8(tval, offset); @@ -655,7 +655,7 @@ static int cy8c95x0_read_regs_mask(struct cy8c95x0_pinctrl *chip, int reg, bitmap_scatter(tmask, mask, chip->map, MAX_LINE); bitmap_scatter(tval, val, chip->map, MAX_LINE); - for_each_set_clump8(offset, bits, tmask, chip->tpin) { + for_each_set_clump8(offset, bits, tmask, chip->nport * BANK_SZ) { unsigned int i = offset / 8; ret = cy8c95x0_regmap_read_bits(chip, reg, i, bits, &read_val);
diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index 48b55c5..ba1c867 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c
@@ -23,7 +23,7 @@ #define PIN_NAME_LEN 10 #define PAD_REG_OFF 0x100 -static void eqbr_gpio_disable_irq(struct irq_data *d) +static void eqbr_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -36,7 +36,7 @@ static void eqbr_gpio_disable_irq(struct irq_data *d) gpiochip_disable_irq(gc, offset); } -static void eqbr_gpio_enable_irq(struct irq_data *d) +static void eqbr_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -50,7 +50,7 @@ static void eqbr_gpio_enable_irq(struct irq_data *d) raw_spin_unlock_irqrestore(&gctrl->lock, flags); } -static void eqbr_gpio_ack_irq(struct irq_data *d) +static void eqbr_irq_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -62,10 +62,17 @@ static void eqbr_gpio_ack_irq(struct irq_data *d) raw_spin_unlock_irqrestore(&gctrl->lock, flags); } -static void eqbr_gpio_mask_ack_irq(struct irq_data *d) +static void eqbr_irq_mask_ack(struct irq_data *d) { - eqbr_gpio_disable_irq(d); - eqbr_gpio_ack_irq(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); + unsigned int offset = irqd_to_hwirq(d); + unsigned long flags; + + raw_spin_lock_irqsave(&gctrl->lock, flags); + writel(BIT(offset), gctrl->membase + GPIO_IRNENCLR); + writel(BIT(offset), gctrl->membase + GPIO_IRNCR); + raw_spin_unlock_irqrestore(&gctrl->lock, flags); } static inline void eqbr_cfg_bit(void __iomem *addr, @@ -92,7 +99,7 @@ static int eqbr_irq_type_cfg(struct gpio_irq_type *type, return 0; } -static int eqbr_gpio_set_irq_type(struct irq_data *d, unsigned int type) +static int eqbr_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct eqbr_gpio_ctrl *gctrl = gpiochip_get_data(gc); @@ -166,11 +173,11 @@ static void eqbr_irq_handler(struct irq_desc *desc) static const struct irq_chip eqbr_irq_chip = { .name = "gpio_irq", - .irq_mask = eqbr_gpio_disable_irq, - .irq_unmask = eqbr_gpio_enable_irq, - .irq_ack = eqbr_gpio_ack_irq, - .irq_mask_ack = eqbr_gpio_mask_ack_irq, - .irq_set_type = eqbr_gpio_set_irq_type, + .irq_ack = eqbr_irq_ack, + .irq_mask = eqbr_irq_mask, + .irq_mask_ack = eqbr_irq_mask_ack, + .irq_unmask = eqbr_irq_unmask, + .irq_set_type = eqbr_irq_set_type, .flags = IRQCHIP_IMMUTABLE, GPIOCHIP_IRQ_RESOURCE_HELPERS, };
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index d87c0b1..f15b18f 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -3640,14 +3640,10 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, * or the gpio driver hasn't probed yet. */ scoped_guard(mutex, &bank->deferred_lock) { - if (!gpio || !gpio->direction_output) { - rc = rockchip_pinconf_defer_pin(bank, - pin - bank->pin_base, - param, arg); - if (rc) - return rc; - break; - } + if (!gpio || !gpio->direction_output) + return rockchip_pinconf_defer_pin(bank, + pin - bank->pin_base, + param, arg); } }
diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 4dfa820..f1c827d 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c
@@ -1067,6 +1067,7 @@ static const struct msm_pinctrl_soc_data qcs615_tlmm = { .ntiles = ARRAY_SIZE(qcs615_tiles), .wakeirq_map = qcs615_pdc_map, .nwakeirq_map = ARRAY_SIZE(qcs615_pdc_map), + .wakeirq_dual_edge_errata = true, }; static const struct of_device_id qcs615_tlmm_of_match[] = {
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c index d93af5f..65411ab 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm660-lpass-lpi.c
@@ -76,7 +76,7 @@ static const char * const pdm_clk_groups[] = { "gpio18" }; static const char * const pdm_rx_groups[] = { "gpio21", "gpio23", "gpio25" }; static const char * const pdm_sync_groups[] = { "gpio19" }; -const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { +static const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { LPI_PINGROUP_OFFSET(0, LPI_NO_SLEW, _, _, _, _, 0x0000), LPI_PINGROUP_OFFSET(1, LPI_NO_SLEW, _, _, _, _, 0x1000), LPI_PINGROUP_OFFSET(2, LPI_NO_SLEW, _, _, _, _, 0x2000), @@ -113,7 +113,7 @@ const struct lpi_pingroup sdm660_lpi_pinctrl_groups[] = { LPI_PINGROUP_OFFSET(31, LPI_NO_SLEW, _, _, _, _, 0xb010), }; -const struct lpi_function sdm660_lpi_pinctrl_functions[] = { +static const struct lpi_function sdm660_lpi_pinctrl_functions[] = { LPI_FUNCTION(comp_rx), LPI_FUNCTION(dmic1_clk), LPI_FUNCTION(dmic1_data),
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 83f940f..d02d425 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -723,6 +723,21 @@ static const struct pinconf_ops pmic_gpio_pinconf_ops = { .pin_config_group_dbg_show = pmic_gpio_config_dbg_show, }; +static int pmic_gpio_get_direction(struct gpio_chip *chip, unsigned pin) +{ + struct pmic_gpio_state *state = gpiochip_get_data(chip); + struct pmic_gpio_pad *pad; + + pad = state->ctrl->desc->pins[pin].drv_data; + + if (!pad->is_enabled || pad->analog_pass || + (!pad->input_enabled && !pad->output_enabled)) + return -EINVAL; + + /* Make sure the state is aligned on what pmic_gpio_get() returns */ + return pad->input_enabled ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT; +} + static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned pin) { struct pmic_gpio_state *state = gpiochip_get_data(chip); @@ -801,6 +816,7 @@ static void pmic_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } static const struct gpio_chip pmic_gpio_gpio_template = { + .get_direction = pmic_gpio_get_direction, .direction_input = pmic_gpio_direction_input, .direction_output = pmic_gpio_direction_output, .get = pmic_gpio_get,
diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index bc8b9b9..d2949e4 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c
@@ -589,7 +589,7 @@ static inline unsigned int rza1_get_bit(struct rza1_port *port, { void __iomem *mem = RZA1_ADDR(port->base, reg, port->id); - return ioread16(mem) & BIT(bit); + return !!(ioread16(mem) & BIT(bit)); } /**
diff --git a/drivers/pinctrl/renesas/pinctrl-rzt2h.c b/drivers/pinctrl/renesas/pinctrl-rzt2h.c index 9949108..5927744c 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzt2h.c +++ b/drivers/pinctrl/renesas/pinctrl-rzt2h.c
@@ -85,7 +85,7 @@ struct rzt2h_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; DECLARE_BITMAP(used_irqs, RZT2H_INTERRUPTS_NUM); - spinlock_t lock; /* lock read/write registers */ + raw_spinlock_t lock; /* lock read/write registers */ struct mutex mutex; /* serialize adding groups and functions */ bool safety_port_enabled; atomic_t wakeup_path; @@ -145,7 +145,7 @@ static void rzt2h_pinctrl_set_pfc_mode(struct rzt2h_pinctrl *pctrl, u64 reg64; u16 reg16; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Set pin to 'Non-use (Hi-Z input protection)' */ reg16 = rzt2h_pinctrl_readw(pctrl, port, PM(port)); @@ -474,7 +474,7 @@ static int rzt2h_gpio_request(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); /* Select GPIO mode in PMC Register */ rzt2h_pinctrl_set_gpio_en(pctrl, port, bit, true); @@ -487,7 +487,7 @@ static void rzt2h_gpio_set_direction(struct rzt2h_pinctrl *pctrl, u32 port, { u16 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readw(pctrl, port, PM(port)); reg &= ~PM_PIN_MASK(bit); @@ -509,7 +509,7 @@ static int rzt2h_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) if (ret) return ret; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); if (rzt2h_pinctrl_readb(pctrl, port, PMC(port)) & BIT(bit)) { /* @@ -547,7 +547,7 @@ static int rzt2h_gpio_set(struct gpio_chip *chip, unsigned int offset, u8 bit = RZT2H_PIN_ID_TO_PIN(offset); u8 reg; - guard(spinlock_irqsave)(&pctrl->lock); + guard(raw_spinlock_irqsave)(&pctrl->lock); reg = rzt2h_pinctrl_readb(pctrl, port, P(port)); if (value) @@ -833,6 +833,7 @@ static int rzt2h_gpio_register(struct rzt2h_pinctrl *pctrl) if (ret) return dev_err_probe(dev, ret, "Unable to parse gpio-ranges\n"); + of_node_put(of_args.np); if (of_args.args[0] != 0 || of_args.args[1] != 0 || of_args.args[2] != pctrl->data->n_port_pins) return dev_err_probe(dev, -EINVAL, @@ -964,7 +965,7 @@ static int rzt2h_pinctrl_probe(struct platform_device *pdev) if (ret) return ret; - spin_lock_init(&pctrl->lock); + raw_spin_lock_init(&pctrl->lock); mutex_init(&pctrl->mutex); platform_set_drvdata(pdev, pctrl);
diff --git a/drivers/pinctrl/stm32/Kconfig b/drivers/pinctrl/stm32/Kconfig index 5f67e1e..d6a1715 100644 --- a/drivers/pinctrl/stm32/Kconfig +++ b/drivers/pinctrl/stm32/Kconfig
@@ -65,6 +65,7 @@ select PINMUX select GENERIC_PINCONF select GPIOLIB + select GPIO_GENERIC help The Hardware Debug Port allows the observation of internal signals. It uses configurable multiplexer to route signals in a dedicated observation register.
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 4843429..d3042e0 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -157,6 +157,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, const char *pin_name, const char *func_name) { + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; int i; for (i = 0; i < pctl->desc->npins; i++) { @@ -168,7 +169,7 @@ sunxi_pinctrl_desc_find_function_by_name(struct sunxi_pinctrl *pctl, while (func->name) { if (!strcmp(func->name, func_name) && (!func->variant || - func->variant & pctl->variant)) + func->variant & variant)) return func; func++; @@ -204,6 +205,34 @@ sunxi_pinctrl_desc_find_function_by_pin(struct sunxi_pinctrl *pctl, return NULL; } +static struct sunxi_desc_function * +sunxi_pinctrl_desc_find_function_by_pin_and_mux(struct sunxi_pinctrl *pctl, + const u16 pin_num, + const u8 muxval) +{ + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; + + for (unsigned int i = 0; i < pctl->desc->npins; i++) { + const struct sunxi_desc_pin *pin = pctl->desc->pins + i; + struct sunxi_desc_function *func = pin->functions; + + if (pin->pin.number != pin_num) + continue; + + if (pin->variant && !(variant & pin->variant)) + continue; + + while (func->name) { + if (func->muxval == muxval) + return func; + + func++; + } + } + + return NULL; +} + static int sunxi_pctrl_get_groups_count(struct pinctrl_dev *pctldev) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); @@ -930,6 +959,30 @@ static const struct pinmux_ops sunxi_pmx_ops = { .strict = true, }; +static int sunxi_pinctrl_gpio_get_direction(struct gpio_chip *chip, + unsigned int offset) +{ + struct sunxi_pinctrl *pctl = gpiochip_get_data(chip); + const struct sunxi_desc_function *func; + u32 pin = offset + chip->base; + u32 reg, shift, mask; + u8 muxval; + + sunxi_mux_reg(pctl, offset, ®, &shift, &mask); + + muxval = (readl(pctl->membase + reg) & mask) >> shift; + + func = sunxi_pinctrl_desc_find_function_by_pin_and_mux(pctl, pin, muxval); + if (!func) + return -ENODEV; + + if (!strcmp(func->name, "gpio_out")) + return GPIO_LINE_DIRECTION_OUT; + if (!strcmp(func->name, "gpio_in") || !strcmp(func->name, "irq")) + return GPIO_LINE_DIRECTION_IN; + return -EINVAL; +} + static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { @@ -1039,6 +1092,9 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) { struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d); struct sunxi_desc_function *func; + unsigned int offset; + u32 reg, shift, mask; + u8 disabled_mux, muxval; int ret; func = sunxi_pinctrl_desc_find_function_by_pin(pctl, @@ -1046,8 +1102,21 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d) if (!func) return -EINVAL; - ret = gpiochip_lock_as_irq(pctl->chip, - pctl->irq_array[d->hwirq] - pctl->desc->pin_base); + offset = pctl->irq_array[d->hwirq] - pctl->desc->pin_base; + sunxi_mux_reg(pctl, offset, ®, &shift, &mask); + muxval = (readl(pctl->membase + reg) & mask) >> shift; + + /* Change muxing to GPIO INPUT mode if at reset value */ + if (pctl->flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) + disabled_mux = SUN4I_FUNC_DISABLED_NEW; + else + disabled_mux = SUN4I_FUNC_DISABLED_OLD; + + if (muxval == disabled_mux) + sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq], + SUN4I_FUNC_INPUT); + + ret = gpiochip_lock_as_irq(pctl->chip, offset); if (ret) { dev_err(pctl->dev, "unable to lock HW IRQ %lu for IRQ\n", irqd_to_hwirq(d)); @@ -1288,6 +1357,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, static int sunxi_pinctrl_build_state(struct platform_device *pdev) { struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev); + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; void *ptr; int i; @@ -1312,7 +1382,7 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_pinctrl_group *group = pctl->groups + pctl->ngroups; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; group->name = pin->pin.name; @@ -1337,11 +1407,11 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; /* Create interrupt mapping while we're at it */ @@ -1369,14 +1439,14 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) const struct sunxi_desc_pin *pin = pctl->desc->pins + i; struct sunxi_desc_function *func; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; for (func = pin->functions; func->name; func++) { struct sunxi_pinctrl_function *func_item; const char **func_grp; - if (func->variant && !(pctl->variant & func->variant)) + if (func->variant && !(variant & func->variant)) continue; func_item = sunxi_pinctrl_find_function_by_name(pctl, @@ -1518,7 +1588,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->dev = &pdev->dev; pctl->desc = desc; - pctl->variant = flags & SUNXI_PINCTRL_VARIANT_MASK; + pctl->flags = flags; if (flags & SUNXI_PINCTRL_NEW_REG_LAYOUT) { pctl->bank_mem_size = D1_BANK_MEM_SIZE; pctl->pull_regs_offset = D1_PULL_REGS_OFFSET; @@ -1554,8 +1624,9 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, for (i = 0, pin_idx = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; + unsigned long variant = pctl->flags & SUNXI_PINCTRL_VARIANT_MASK; - if (pin->variant && !(pctl->variant & pin->variant)) + if (pin->variant && !(variant & pin->variant)) continue; pins[pin_idx++] = pin->pin; @@ -1599,6 +1670,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->chip->request = gpiochip_generic_request; pctl->chip->free = gpiochip_generic_free; pctl->chip->set_config = gpiochip_generic_config; + pctl->chip->get_direction = sunxi_pinctrl_gpio_get_direction; pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input; pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output; pctl->chip->get = sunxi_pinctrl_gpio_get;
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index ad26e4d..0daf760 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
@@ -86,6 +86,8 @@ #define SUN4I_FUNC_INPUT 0 #define SUN4I_FUNC_IRQ 6 +#define SUN4I_FUNC_DISABLED_OLD 7 +#define SUN4I_FUNC_DISABLED_NEW 15 #define SUNXI_PINCTRL_VARIANT_MASK GENMASK(7, 0) #define SUNXI_PINCTRL_NEW_REG_LAYOUT BIT(8) @@ -174,7 +176,7 @@ struct sunxi_pinctrl { unsigned *irq_array; raw_spinlock_t lock; struct pinctrl_dev *pctl_dev; - unsigned long variant; + unsigned long flags; u32 bank_mem_size; u32 pull_regs_offset; u32 dlevel_field_width;
diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c index fa7b3bd..bee271a 100644 --- a/drivers/platform/olpc/olpc-xo175-ec.c +++ b/drivers/platform/olpc/olpc-xo175-ec.c
@@ -482,7 +482,7 @@ static int olpc_xo175_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *resp, dev_dbg(dev, "CMD %x, %zd bytes expected\n", cmd, resp_len); if (inlen > 5) { - dev_err(dev, "command len %zd too big!\n", resp_len); + dev_err(dev, "command len %zd too big!\n", inlen); return -EOVERFLOW; }
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 19f82c1..631ffc0 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -117,7 +117,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms } if (unlikely(mbox_status == HSMP_STATUS_NOT_READY)) { - dev_err(sock->dev, "Message ID 0x%X failure : SMU tmeout (status = 0x%X)\n", + dev_err(sock->dev, "Message ID 0x%X failure : SMU timeout (status = 0x%X)\n", msg->msg_id, mbox_status); return -ETIMEDOUT; } else if (unlikely(mbox_status == HSMP_ERR_INVALID_MSG)) {
diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 6e9703b..5697437 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h
@@ -348,6 +348,35 @@ struct power_data { static const struct dmi_system_id power_limits[] = { { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "FA401UM"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 80, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 80, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 35, + .ppt_pl2_sppt_min = 31, + .ppt_pl2_sppt_max = 44, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "FA401UV"), }, .driver_data = &(struct power_data) { @@ -1053,6 +1082,20 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA503QM"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_def = 35, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_max = 80, + }, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "GA503QR"), }, .driver_data = &(struct power_data) { @@ -1459,6 +1502,67 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GX650RX"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 70, + .ppt_pl1_spl_max = 90, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_def = 70, + .ppt_pl2_sppt_max = 100, + .ppt_pl3_fppt_min = 28, + .ppt_pl3_fppt_def = 110, + .ppt_pl3_fppt_max = 125, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 25, + .nv_temp_target_min = 76, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_max = 50, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_max = 50, + .ppt_pl3_fppt_min = 28, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 76, + .nv_temp_target_max = 87, + }, + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GZ302EA"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 60, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 75, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 86, + .ppt_pl3_fppt_max = 93, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 45, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_def = 52, + .ppt_pl2_sppt_max = 92, + .ppt_pl3_fppt_min = 45, + .ppt_pl3_fppt_def = 71, + .ppt_pl3_fppt_max = 93, + }, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "G513I"), }, .driver_data = &(struct power_data) { @@ -1537,6 +1641,40 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G614FP"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 30, + .ppt_pl1_spl_max = 120, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_def = 140, + .ppt_pl2_sppt_max = 165, + .ppt_pl3_fppt_min = 65, + .ppt_pl3_fppt_def = 140, + .ppt_pl3_fppt_max = 165, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_tgp_min = 50, + .nv_tgp_max = 100, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 65, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 65, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 75, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "G614J"), }, .driver_data = &(struct power_data) { @@ -1710,6 +1848,20 @@ static const struct dmi_system_id power_limits[] = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G733QS"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 15, + .ppt_pl2_sppt_max = 80, + }, + .requires_fan_curve = false, + }, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "G814J"), }, .driver_data = &(struct power_data) {
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index a38a65f..b4677c5 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -548,7 +548,7 @@ static const struct dmi_system_id asus_quirks[] = { .callback = dmi_matched, .ident = "ASUS ROG Z13", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_SYS_VENDOR, "ASUS"), DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow Z13"), }, .driver_data = &quirk_asus_z13,
diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index e69b501..d1b4df9 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c
@@ -175,7 +175,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m18"), }, - .driver_data = &generic_quirks, + .driver_data = &g_series_quirks, }, { .ident = "Alienware x15",
diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 4eefbad..e7a411ae 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c
@@ -80,6 +80,12 @@ static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = { static const struct key_entry dell_wmi_keymap_type_0000[] = { { KE_IGNORE, 0x003a, { KEY_CAPSLOCK } }, + /* Audio mute toggle */ + { KE_KEY, 0x0109, { KEY_MUTE } }, + + /* Mic mute toggle */ + { KE_KEY, 0x0150, { KEY_MICMUTE } }, + /* Meta key lock */ { KE_IGNORE, 0xe000, { KEY_RIGHTMETA } },
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c index 86ec962..e586f79 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
@@ -93,7 +93,6 @@ int set_new_password(const char *password_type, const char *new) if (ret < 0) goto out; - print_hex_dump_bytes("set new password data: ", DUMP_PREFIX_NONE, buffer, buffer_size); ret = call_password_interface(wmi_priv.password_attr_wdev, buffer, buffer_size); /* on success copy the new password to current password */ if (!ret)
diff --git a/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c b/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c index 470b9f4..af4d192 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c +++ b/drivers/platform/x86/hp/hp-bioscfg/enum-attributes.c
@@ -94,8 +94,11 @@ int hp_alloc_enumeration_data(void) bioscfg_drv.enumeration_instances_count = hp_get_instance_count(HP_WMI_BIOS_ENUMERATION_GUID); - bioscfg_drv.enumeration_data = kzalloc_objs(*bioscfg_drv.enumeration_data, - bioscfg_drv.enumeration_instances_count); + if (!bioscfg_drv.enumeration_instances_count) + return -EINVAL; + bioscfg_drv.enumeration_data = kvcalloc(bioscfg_drv.enumeration_instances_count, + sizeof(*bioscfg_drv.enumeration_data), GFP_KERNEL); + if (!bioscfg_drv.enumeration_data) { bioscfg_drv.enumeration_instances_count = 0; return -ENOMEM; @@ -444,6 +447,6 @@ void hp_exit_enumeration_attributes(void) } bioscfg_drv.enumeration_instances_count = 0; - kfree(bioscfg_drv.enumeration_data); + kvfree(bioscfg_drv.enumeration_data); bioscfg_drv.enumeration_data = NULL; }
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 304d9ac..988a0ac 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -120,6 +120,13 @@ static const struct thermal_profile_params omen_v1_thermal_params = { .ec_tp_offset = HP_VICTUS_S_EC_THERMAL_PROFILE_OFFSET, }; +static const struct thermal_profile_params omen_v1_legacy_thermal_params = { + .performance = HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE, + .balanced = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .low_power = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT, + .ec_tp_offset = HP_OMEN_EC_THERMAL_PROFILE_OFFSET, +}; + /* * A generic pointer for the currently-active board's thermal profile * parameters. @@ -146,6 +153,7 @@ static const char * const omen_thermal_profile_boards[] = { "8900", "8901", "8902", "8912", "8917", "8918", "8949", "894A", "89EB", "8A15", "8A42", "8BAD", + "8E41", }; /* DMI Board names of Omen laptops that are specifically set to be thermal @@ -166,18 +174,35 @@ static const char * const omen_timed_thermal_profile_boards[] = { "8BAD", }; -/* DMI Board names of Victus 16-d1xxx laptops */ +/* DMI Board names of Victus 16-d laptops */ static const char * const victus_thermal_profile_boards[] = { + "88F8", "8A25", }; /* DMI Board names of Victus 16-r and Victus 16-s laptops */ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = { { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8A4D") }, + .driver_data = (void *)&omen_v1_legacy_thermal_params, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BAB") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") }, .driver_data = (void *)&victus_s_thermal_params, }, { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCD") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BD4") }, .driver_data = (void *)&victus_s_thermal_params, }, @@ -186,6 +211,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .driver_data = (void *)&victus_s_thermal_params, }, { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C76") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, + { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C78") }, .driver_data = (void *)&omen_v1_thermal_params, },
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 0f8684f..2ddd8af 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c
@@ -136,6 +136,13 @@ static const struct dmi_system_id button_array_table[] = { }, }, { + .ident = "Lenovo ThinkPad X1 Fold 16 Gen 1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Fold 16 Gen 1"), + }, + }, + { .ident = "Microsoft Surface Go 3", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), @@ -189,6 +196,18 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell Pro Rugged 12 Tablet RA02260"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell 14 Plus 2-in-1 DB04250"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell 16 Plus 2-in-1 DB06250"), + }, + }, { } }; @@ -419,6 +438,14 @@ static int intel_hid_pl_suspend_handler(struct device *device) return 0; } +static int intel_hid_pl_freeze_handler(struct device *device) +{ + struct intel_hid_priv *priv = dev_get_drvdata(device); + + priv->wakeup_mode = false; + return intel_hid_pl_suspend_handler(device); +} + static int intel_hid_pl_resume_handler(struct device *device) { intel_hid_pm_complete(device); @@ -433,7 +460,7 @@ static int intel_hid_pl_resume_handler(struct device *device) static const struct dev_pm_ops intel_hid_pl_pm_ops = { .prepare = intel_hid_pm_prepare, .complete = intel_hid_pm_complete, - .freeze = intel_hid_pl_suspend_handler, + .freeze = intel_hid_pl_freeze_handler, .thaw = intel_hid_pl_resume_handler, .restore = intel_hid_pl_resume_handler, .suspend = intel_hid_pl_suspend_handler,
diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 1455d9a..1c65ce8 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -223,6 +223,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 *con_id = "avdd"; *gpio_flags = GPIO_ACTIVE_HIGH; break; + case INT3472_GPIO_TYPE_DOVDD: + *con_id = "dovdd"; + *gpio_flags = GPIO_ACTIVE_HIGH; + break; case INT3472_GPIO_TYPE_HANDSHAKE: *con_id = "dvdd"; *gpio_flags = GPIO_ACTIVE_HIGH; @@ -251,6 +255,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 * 0x0b Power enable * 0x0c Clock enable * 0x0d Privacy LED + * 0x10 DOVDD (digital I/O voltage) * 0x13 Hotplug detect * * There are some known platform specific quirks where that does not quite @@ -332,6 +337,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_CLK_ENABLE: case INT3472_GPIO_TYPE_PRIVACY_LED: case INT3472_GPIO_TYPE_POWER_ENABLE: + case INT3472_GPIO_TYPE_DOVDD: case INT3472_GPIO_TYPE_HANDSHAKE: gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, con_id, gpio_flags); if (IS_ERR(gpio)) { @@ -356,6 +362,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_POWER_ENABLE: second_sensor = int3472->quirks.avdd_second_sensor; fallthrough; + case INT3472_GPIO_TYPE_DOVDD: case INT3472_GPIO_TYPE_HANDSHAKE: ret = skl_int3472_register_regulator(int3472, gpio, enable_time_us, con_id, second_sensor);
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index b8cdaa2..e238c31 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
@@ -558,6 +558,9 @@ static bool disable_dynamic_sst_features(void) { u64 value; + if (!static_cpu_has(X86_FEATURE_HWP)) + return true; + rdmsrq(MSR_PM_ENABLE, value); return !(value & 0x1); } @@ -869,7 +872,7 @@ static int isst_if_get_perf_level(void __user *argp) _read_pp_info("current_level", perf_level.current_level, SST_PP_STATUS_OFFSET, SST_PP_LEVEL_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("locked", perf_level.locked, SST_PP_STATUS_OFFSET, - SST_PP_LOCK_START, SST_PP_LEVEL_WIDTH, SST_MUL_FACTOR_NONE) + SST_PP_LOCK_START, SST_PP_LOCK_WIDTH, SST_MUL_FACTOR_NONE) _read_pp_info("feature_state", perf_level.feature_state, SST_PP_STATUS_OFFSET, SST_PP_FEATURE_STATE_START, SST_PP_FEATURE_STATE_WIDTH, SST_MUL_FACTOR_NONE) perf_level.enabled = !!(power_domain_info->sst_header.cap_mask & BIT(1));
diff --git a/drivers/platform/x86/lenovo/thinkpad_acpi.c b/drivers/platform/x86/lenovo/thinkpad_acpi.c index f9c7367..8982d92 100644 --- a/drivers/platform/x86/lenovo/thinkpad_acpi.c +++ b/drivers/platform/x86/lenovo/thinkpad_acpi.c
@@ -9525,14 +9525,16 @@ static int tpacpi_battery_get(int what, int battery, int *ret) { switch (what) { case THRESHOLD_START: - if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery)) + if (!battery_info.batteries[battery].start_support || + ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_START, ret, battery))) return -ENODEV; /* The value is in the low 8 bits of the response */ *ret = *ret & 0xFF; return 0; case THRESHOLD_STOP: - if ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery)) + if (!battery_info.batteries[battery].stop_support || + ACPI_FAILURE(tpacpi_battery_acpi_eval(GET_STOP, ret, battery))) return -ENODEV; /* Value is in lower 8 bits */ *ret = *ret & 0xFF;
diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index 381836d..c7fe7e3 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c
@@ -31,8 +31,6 @@ #define LWMI_GZ_METHOD_ID_SMARTFAN_SET 44 #define LWMI_GZ_METHOD_ID_SMARTFAN_GET 45 -static BLOCKING_NOTIFIER_HEAD(gz_chain_head); - struct lwmi_gz_priv { enum thermal_mode current_mode; struct notifier_block event_nb;
diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c index 144a454..6d4a53a 100644 --- a/drivers/platform/x86/oxpec.c +++ b/drivers/platform/x86/oxpec.c
@@ -11,7 +11,7 @@ * * Copyright (C) 2022 Joaquín I. Aramendía <samsagax@gmail.com> * Copyright (C) 2024 Derek J. Clark <derekjohn.clark@gmail.com> - * Copyright (C) 2025 Antheas Kapenekakis <lkml@antheas.dev> + * Copyright (C) 2025-2026 Antheas Kapenekakis <lkml@antheas.dev> */ #include <linux/acpi.h> @@ -117,6 +117,13 @@ static const struct dmi_system_id dmi_table[] = { { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "AOKZOE A2 Pro"), + }, + .driver_data = (void *)aok_zoe_a1, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "AOKZOE A1X"), }, .driver_data = (void *)oxp_fly, @@ -145,6 +152,13 @@ static const struct dmi_system_id dmi_table[] = { { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER APEX"), + }, + .driver_data = (void *)oxp_fly, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER F1"), }, .driver_data = (void *)oxp_fly, @@ -215,6 +229,13 @@ static const struct dmi_system_id dmi_table[] = { { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1z"), + }, + .driver_data = (void *)oxp_x1, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1 A"), }, .driver_data = (void *)oxp_x1, @@ -229,6 +250,13 @@ static const struct dmi_system_id dmi_table[] = { { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1Air"), + }, + .driver_data = (void *)oxp_x1, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1 mini"), }, .driver_data = (void *)oxp_x1,
diff --git a/drivers/platform/x86/redmi-wmi.c b/drivers/platform/x86/redmi-wmi.c index 949236b..e5cb348 100644 --- a/drivers/platform/x86/redmi-wmi.c +++ b/drivers/platform/x86/redmi-wmi.c
@@ -20,7 +20,10 @@ static const struct key_entry redmi_wmi_keymap[] = { {KE_KEY, 0x00000201, {KEY_SELECTIVE_SCREENSHOT}}, {KE_KEY, 0x00000301, {KEY_ALL_APPLICATIONS}}, - {KE_KEY, 0x00001b01, {KEY_SETUP}}, + {KE_KEY, 0x00001b01, {KEY_CONFIG}}, + {KE_KEY, 0x00011b01, {KEY_CONFIG}}, + {KE_KEY, 0x00010101, {KEY_SWITCHVIDEOMODE}}, + {KE_KEY, 0x00001a01, {KEY_REFRESH_RATE_TOGGLE}}, /* AI button has code for each position */ {KE_KEY, 0x00011801, {KEY_ASSISTANT}}, @@ -32,6 +35,26 @@ static const struct key_entry redmi_wmi_keymap[] = { {KE_IGNORE, 0x00050501, {}}, {KE_IGNORE, 0x000a0501, {}}, + /* Xiaomi G Command Center */ + {KE_KEY, 0x00010a01, {KEY_VENDOR}}, + + /* OEM preset power mode */ + {KE_IGNORE, 0x00011601, {}}, + {KE_IGNORE, 0x00021601, {}}, + {KE_IGNORE, 0x00031601, {}}, + {KE_IGNORE, 0x00041601, {}}, + + /* Fn Lock state */ + {KE_IGNORE, 0x00000701, {}}, + {KE_IGNORE, 0x00010701, {}}, + + /* Fn+`/1/2/3/4 */ + {KE_KEY, 0x00011101, {KEY_F13}}, + {KE_KEY, 0x00011201, {KEY_F14}}, + {KE_KEY, 0x00011301, {KEY_F15}}, + {KE_KEY, 0x00011401, {KEY_F16}}, + {KE_KEY, 0x00011501, {KEY_F17}}, + {KE_END} };
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index bdc19cd..d83c387 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -410,6 +410,16 @@ static const struct ts_dmi_data gdix1002_upside_down_data = { .properties = gdix1001_upside_down_props, }; +static const struct property_entry gdix1001_y_inverted_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + { } +}; + +static const struct ts_dmi_data gdix1001_y_inverted_data = { + .acpi_name = "GDIX1001", + .properties = gdix1001_y_inverted_props, +}; + static const struct property_entry gp_electronic_t701_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 960), PROPERTY_ENTRY_U32("touchscreen-size-y", 640), @@ -1659,6 +1669,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* SUPI S10 */ + .driver_data = (void *)&gdix1001_y_inverted_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SUPI"), + DMI_MATCH(DMI_PRODUCT_NAME, "S10"), + }, + }, + { /* Techbite Arc 11.6 */ .driver_data = (void *)&techbite_arc_11_6_data, .matches = {
diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index fee9353..6341dca 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c
@@ -314,8 +314,8 @@ #define LED_CHANNELS 3 #define LED_MAX_BRIGHTNESS 200 -#define UNIWILL_FEATURE_FN_LOCK_TOGGLE BIT(0) -#define UNIWILL_FEATURE_SUPER_KEY_TOGGLE BIT(1) +#define UNIWILL_FEATURE_FN_LOCK BIT(0) +#define UNIWILL_FEATURE_SUPER_KEY BIT(1) #define UNIWILL_FEATURE_TOUCHPAD_TOGGLE BIT(2) #define UNIWILL_FEATURE_LIGHTBAR BIT(3) #define UNIWILL_FEATURE_BATTERY BIT(4) @@ -330,6 +330,7 @@ struct uniwill_data { struct acpi_battery_hook hook; unsigned int last_charge_ctrl; struct mutex battery_lock; /* Protects the list of currently registered batteries */ + unsigned int last_status; unsigned int last_switch_status; struct mutex super_key_lock; /* Protects the toggling of the super key lock state */ struct list_head batteries; @@ -377,11 +378,15 @@ static const struct key_entry uniwill_keymap[] = { { KE_IGNORE, UNIWILL_OSD_CAPSLOCK, { KEY_CAPSLOCK }}, { KE_IGNORE, UNIWILL_OSD_NUMLOCK, { KEY_NUMLOCK }}, - /* Reported when the user locks/unlocks the super key */ - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_ENABLE, { KEY_UNKNOWN }}, - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_DISABLE, { KEY_UNKNOWN }}, + /* + * Reported when the user enables/disables the super key. + * Those events might even be reported when the change was done + * using the sysfs attribute! + */ + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_DISABLE, { KEY_UNKNOWN }}, + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_ENABLE, { KEY_UNKNOWN }}, /* Optional, might not be reported by all devices */ - { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_LOCK_CHANGED, { KEY_UNKNOWN }}, + { KE_IGNORE, UNIWILL_OSD_SUPER_KEY_STATE_CHANGED, { KEY_UNKNOWN }}, /* Reported in manual mode when toggling the airplane mode status */ { KE_KEY, UNIWILL_OSD_RFKILL, { KEY_RFKILL }}, @@ -401,9 +406,6 @@ static const struct key_entry uniwill_keymap[] = { /* Reported when the user wants to toggle the mute status */ { KE_IGNORE, UNIWILL_OSD_MUTE, { KEY_MUTE }}, - /* Reported when the user locks/unlocks the Fn key */ - { KE_IGNORE, UNIWILL_OSD_FN_LOCK, { KEY_FN_ESC }}, - /* Reported when the user wants to toggle the brightness of the keyboard */ { KE_KEY, UNIWILL_OSD_KBDILLUMTOGGLE, { KEY_KBDILLUMTOGGLE }}, { KE_KEY, UNIWILL_OSD_KB_LED_LEVEL0, { KEY_KBDILLUMTOGGLE }}, @@ -576,6 +578,7 @@ static bool uniwill_volatile_reg(struct device *dev, unsigned int reg) case EC_ADDR_SECOND_FAN_RPM_1: case EC_ADDR_SECOND_FAN_RPM_2: case EC_ADDR_BAT_ALERT: + case EC_ADDR_BIOS_OEM: case EC_ADDR_PWM_1: case EC_ADDR_PWM_2: case EC_ADDR_TRIGGER: @@ -600,8 +603,8 @@ static const struct regmap_config uniwill_ec_config = { .use_single_write = true, }; -static ssize_t fn_lock_toggle_enable_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t fn_lock_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -624,8 +627,7 @@ static ssize_t fn_lock_toggle_enable_store(struct device *dev, struct device_att return count; } -static ssize_t fn_lock_toggle_enable_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t fn_lock_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -638,10 +640,10 @@ static ssize_t fn_lock_toggle_enable_show(struct device *dev, struct device_attr return sysfs_emit(buf, "%d\n", !!(value & FN_LOCK_STATUS)); } -static DEVICE_ATTR_RW(fn_lock_toggle_enable); +static DEVICE_ATTR_RW(fn_lock); -static ssize_t super_key_toggle_enable_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t super_key_enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -673,8 +675,7 @@ static ssize_t super_key_toggle_enable_store(struct device *dev, struct device_a return count; } -static ssize_t super_key_toggle_enable_show(struct device *dev, struct device_attribute *attr, - char *buf) +static ssize_t super_key_enable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uniwill_data *data = dev_get_drvdata(dev); unsigned int value; @@ -687,7 +688,7 @@ static ssize_t super_key_toggle_enable_show(struct device *dev, struct device_at return sysfs_emit(buf, "%d\n", !(value & SUPER_KEY_LOCK_STATUS)); } -static DEVICE_ATTR_RW(super_key_toggle_enable); +static DEVICE_ATTR_RW(super_key_enable); static ssize_t touchpad_toggle_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -881,8 +882,8 @@ static int uniwill_nvidia_ctgp_init(struct uniwill_data *data) static struct attribute *uniwill_attrs[] = { /* Keyboard-related */ - &dev_attr_fn_lock_toggle_enable.attr, - &dev_attr_super_key_toggle_enable.attr, + &dev_attr_fn_lock.attr, + &dev_attr_super_key_enable.attr, &dev_attr_touchpad_toggle_enable.attr, /* Lightbar-related */ &dev_attr_rainbow_animation.attr, @@ -897,13 +898,13 @@ static umode_t uniwill_attr_is_visible(struct kobject *kobj, struct attribute *a struct device *dev = kobj_to_dev(kobj); struct uniwill_data *data = dev_get_drvdata(dev); - if (attr == &dev_attr_fn_lock_toggle_enable.attr) { - if (uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK_TOGGLE)) + if (attr == &dev_attr_fn_lock.attr) { + if (uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) return attr->mode; } - if (attr == &dev_attr_super_key_toggle_enable.attr) { - if (uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (attr == &dev_attr_super_key_enable.attr) { + if (uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return attr->mode; } @@ -1357,6 +1358,9 @@ static int uniwill_notifier_call(struct notifier_block *nb, unsigned long action switch (action) { case UNIWILL_OSD_BATTERY_ALERT: + if (!uniwill_device_supports(data, UNIWILL_FEATURE_BATTERY)) + return NOTIFY_DONE; + mutex_lock(&data->battery_lock); list_for_each_entry(entry, &data->batteries, head) { power_supply_changed(entry->battery); @@ -1370,6 +1374,13 @@ static int uniwill_notifier_call(struct notifier_block *nb, unsigned long action */ return NOTIFY_OK; + case UNIWILL_OSD_FN_LOCK: + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return NOTIFY_DONE; + + sysfs_notify(&data->dev->kobj, NULL, "fn_lock"); + + return NOTIFY_OK; default: mutex_lock(&data->input_lock); sparse_keymap_report_event(data->input_device, action, 1, true); @@ -1503,9 +1514,21 @@ static void uniwill_shutdown(struct platform_device *pdev) regmap_clear_bits(data->regmap, EC_ADDR_AP_OEM, ENABLE_MANUAL_CTRL); } -static int uniwill_suspend_keyboard(struct uniwill_data *data) +static int uniwill_suspend_fn_lock(struct uniwill_data *data) { - if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return 0; + + /* + * The EC_ADDR_BIOS_OEM is marked as volatile, so we have to restore it + * ourselves. + */ + return regmap_read(data->regmap, EC_ADDR_BIOS_OEM, &data->last_status); +} + +static int uniwill_suspend_super_key(struct uniwill_data *data) +{ + if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return 0; /* @@ -1542,7 +1565,11 @@ static int uniwill_suspend(struct device *dev) struct uniwill_data *data = dev_get_drvdata(dev); int ret; - ret = uniwill_suspend_keyboard(data); + ret = uniwill_suspend_fn_lock(data); + if (ret < 0) + return ret; + + ret = uniwill_suspend_super_key(data); if (ret < 0) return ret; @@ -1560,12 +1587,21 @@ static int uniwill_suspend(struct device *dev) return 0; } -static int uniwill_resume_keyboard(struct uniwill_data *data) +static int uniwill_resume_fn_lock(struct uniwill_data *data) +{ + if (!uniwill_device_supports(data, UNIWILL_FEATURE_FN_LOCK)) + return 0; + + return regmap_update_bits(data->regmap, EC_ADDR_BIOS_OEM, FN_LOCK_STATUS, + data->last_status); +} + +static int uniwill_resume_super_key(struct uniwill_data *data) { unsigned int value; int ret; - if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY_TOGGLE)) + if (!uniwill_device_supports(data, UNIWILL_FEATURE_SUPER_KEY)) return 0; ret = regmap_read(data->regmap, EC_ADDR_SWITCH_STATUS, &value); @@ -1608,7 +1644,11 @@ static int uniwill_resume(struct device *dev) if (ret < 0) return ret; - ret = uniwill_resume_keyboard(data); + ret = uniwill_resume_fn_lock(data); + if (ret < 0) + return ret; + + ret = uniwill_resume_super_key(data); if (ret < 0) return ret; @@ -1643,16 +1683,16 @@ static struct platform_driver uniwill_driver = { }; static struct uniwill_device_descriptor lapac71h_descriptor __initdata = { - .features = UNIWILL_FEATURE_FN_LOCK_TOGGLE | - UNIWILL_FEATURE_SUPER_KEY_TOGGLE | + .features = UNIWILL_FEATURE_FN_LOCK | + UNIWILL_FEATURE_SUPER_KEY | UNIWILL_FEATURE_TOUCHPAD_TOGGLE | UNIWILL_FEATURE_BATTERY | UNIWILL_FEATURE_HWMON, }; static struct uniwill_device_descriptor lapkc71f_descriptor __initdata = { - .features = UNIWILL_FEATURE_FN_LOCK_TOGGLE | - UNIWILL_FEATURE_SUPER_KEY_TOGGLE | + .features = UNIWILL_FEATURE_FN_LOCK | + UNIWILL_FEATURE_SUPER_KEY | UNIWILL_FEATURE_TOUCHPAD_TOGGLE | UNIWILL_FEATURE_LIGHTBAR | UNIWILL_FEATURE_BATTERY |
diff --git a/drivers/platform/x86/uniwill/uniwill-wmi.h b/drivers/platform/x86/uniwill/uniwill-wmi.h index 48783b2..fb1910c 100644 --- a/drivers/platform/x86/uniwill/uniwill-wmi.h +++ b/drivers/platform/x86/uniwill/uniwill-wmi.h
@@ -64,8 +64,8 @@ #define UNIWILL_OSD_KB_LED_LEVEL3 0x3E #define UNIWILL_OSD_KB_LED_LEVEL4 0x3F -#define UNIWILL_OSD_SUPER_KEY_LOCK_ENABLE 0x40 -#define UNIWILL_OSD_SUPER_KEY_LOCK_DISABLE 0x41 +#define UNIWILL_OSD_SUPER_KEY_DISABLE 0x40 +#define UNIWILL_OSD_SUPER_KEY_ENABLE 0x41 #define UNIWILL_OSD_MENU_JP 0x42 @@ -74,7 +74,7 @@ #define UNIWILL_OSD_RFKILL 0xA4 -#define UNIWILL_OSD_SUPER_KEY_LOCK_CHANGED 0xA5 +#define UNIWILL_OSD_SUPER_KEY_STATE_CHANGED 0xA5 #define UNIWILL_OSD_LIGHTBAR_STATE_CHANGED 0xA6
diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 1d29add..eee87a3 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c
@@ -9,6 +9,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/mfd/bcm2835-pm.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -153,7 +154,6 @@ struct bcm2835_power { static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable) { void __iomem *base = power->asb; - u64 start; u32 val; switch (reg) { @@ -166,8 +166,6 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable break; } - start = ktime_get_ns(); - /* Enable the module's async AXI bridges. */ if (enable) { val = readl(base + reg) & ~ASB_REQ_STOP; @@ -176,11 +174,9 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable } writel(PM_PASSWORD | val, base + reg); - while (!!(readl(base + reg) & ASB_ACK) == enable) { - cpu_relax(); - if (ktime_get_ns() - start >= 1000) - return -ETIMEDOUT; - } + if (readl_poll_timeout_atomic(base + reg, val, + !!(val & ASB_ACK) != enable, 0, 5)) + return -ETIMEDOUT; return 0; } @@ -580,11 +576,11 @@ static int bcm2835_reset_status(struct reset_controller_dev *rcdev, switch (id) { case BCM2835_RESET_V3D: - return !PM_READ(PM_GRAFX & PM_V3DRSTN); + return !(PM_READ(PM_GRAFX) & PM_V3DRSTN); case BCM2835_RESET_H264: - return !PM_READ(PM_IMAGE & PM_H264RSTN); + return !(PM_READ(PM_IMAGE) & PM_H264RSTN); case BCM2835_RESET_ISP: - return !PM_READ(PM_IMAGE & PM_ISPRSTN); + return !(PM_READ(PM_IMAGE) & PM_ISPRSTN); default: return -EINVAL; }
diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c index cff738e..a829f8d 100644 --- a/drivers/pmdomain/imx/gpcv2.c +++ b/drivers/pmdomain/imx/gpcv2.c
@@ -1416,7 +1416,9 @@ static int imx_pgc_domain_suspend(struct device *dev) static int imx_pgc_domain_resume(struct device *dev) { - return pm_runtime_put(dev); + pm_runtime_put(dev); + + return 0; } #endif
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index f64f24d..e2800aa 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -1203,7 +1203,7 @@ static int scpsys_probe(struct platform_device *pdev) scpsys->soc_data = soc; scpsys->pd_data.domains = scpsys->domains; - scpsys->pd_data.num_domains = soc->num_domains; + scpsys->pd_data.num_domains = num_domains; parent = dev->parent; if (!parent) {
diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c index 997e93c..44d3484 100644 --- a/drivers/pmdomain/rockchip/pm-domains.c +++ b/drivers/pmdomain/rockchip/pm-domains.c
@@ -1311,7 +1311,7 @@ static const struct rockchip_domain_info rk3576_pm_domains[] = { static const struct rockchip_domain_info rk3588_pm_domains[] = { [RK3588_PD_GPU] = DOMAIN_RK3588("gpu", 0x0, BIT(0), 0, 0x0, 0, BIT(1), 0x0, BIT(0), BIT(0), false, true), [RK3588_PD_NPU] = DOMAIN_RK3588("npu", 0x0, BIT(1), BIT(1), 0x0, 0, 0, 0x0, 0, 0, false, true), - [RK3588_PD_VCODEC] = DOMAIN_RK3588("vcodec", 0x0, BIT(2), BIT(2), 0x0, 0, 0, 0x0, 0, 0, false, false), + [RK3588_PD_VCODEC] = DOMAIN_RK3588("vcodec", 0x0, BIT(2), BIT(2), 0x0, 0, 0, 0x0, 0, 0, false, true), [RK3588_PD_NPUTOP] = DOMAIN_RK3588("nputop", 0x0, BIT(3), 0, 0x0, BIT(11), BIT(2), 0x0, BIT(1), BIT(1), false, false), [RK3588_PD_NPU1] = DOMAIN_RK3588("npu1", 0x0, BIT(4), 0, 0x0, BIT(12), BIT(3), 0x0, BIT(2), BIT(2), false, false), [RK3588_PD_NPU2] = DOMAIN_RK3588("npu2", 0x0, BIT(5), 0, 0x0, BIT(13), BIT(4), 0x0, BIT(3), BIT(3), false, false),
diff --git a/drivers/power/sequencing/pwrseq-pcie-m2.c b/drivers/power/sequencing/pwrseq-pcie-m2.c index d31a7dd..dadb4aa 100644 --- a/drivers/power/sequencing/pwrseq-pcie-m2.c +++ b/drivers/power/sequencing/pwrseq-pcie-m2.c
@@ -109,7 +109,7 @@ static int pwrseq_pcie_m2_probe(struct platform_device *pdev) if (!ctx) return -ENOMEM; - ctx->of_node = of_node_get(dev->of_node); + ctx->of_node = dev_of_node(dev); ctx->pdata = device_get_match_data(dev); if (!ctx->pdata) return dev_err_probe(dev, -ENODEV,
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index a708fc6..d10b6f9 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig
@@ -508,7 +508,7 @@ This driver supports the FP9931/JD9930 voltage regulator chip which is used to provide power to Electronic Paper Displays so it is found in E-Book readers. - If HWWON is enabled, it also provides temperature measurement. + If HWMON is enabled, it also provides temperature measurement. config REGULATOR_LM363X tristate "TI LM363X voltage regulators"
diff --git a/drivers/regulator/bq257xx-regulator.c b/drivers/regulator/bq257xx-regulator.c index fc1cced..dab8f1a 100644 --- a/drivers/regulator/bq257xx-regulator.c +++ b/drivers/regulator/bq257xx-regulator.c
@@ -115,11 +115,10 @@ static void bq257xx_reg_dt_parse_gpio(struct platform_device *pdev) return; subchild = of_get_child_by_name(child, pdata->desc.of_match); + of_node_put(child); if (!subchild) return; - of_node_put(child); - pdata->otg_en_gpio = devm_fwnode_gpiod_get_index(&pdev->dev, of_fwnode_handle(subchild), "enable", 0,
diff --git a/drivers/regulator/fp9931.c b/drivers/regulator/fp9931.c index 7fbcc63..abea3b6 100644 --- a/drivers/regulator/fp9931.c +++ b/drivers/regulator/fp9931.c
@@ -144,13 +144,12 @@ static int fp9931_hwmon_read(struct device *dev, enum hwmon_sensor_types type, return ret; ret = regmap_read(data->regmap, FP9931_REG_TMST_VALUE, &val); - if (ret) - return ret; + if (!ret) + *temp = (s8)val * 1000; pm_runtime_put_autosuspend(data->dev); - *temp = (s8)val * 1000; - return 0; + return ret; } static umode_t fp9931_hwmon_is_visible(const void *data,
diff --git a/drivers/regulator/mt6363-regulator.c b/drivers/regulator/mt6363-regulator.c index 03af5fa..0aebcbd 100644 --- a/drivers/regulator/mt6363-regulator.c +++ b/drivers/regulator/mt6363-regulator.c
@@ -899,10 +899,8 @@ static int mt6363_regulator_probe(struct platform_device *pdev) "Failed to map IRQ%d\n", info->hwirq); ret = devm_add_action_or_reset(dev, mt6363_irq_remove, &info->virq); - if (ret) { - irq_dispose_mapping(info->hwirq); + if (ret) return ret; - } config.driver_data = info; INIT_DELAYED_WORK(&info->oc_work, mt6363_oc_irq_enable_work);
diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index 5fa8682..45d7dc4 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c
@@ -1293,6 +1293,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) struct regulator_dev *ldo5; struct pca9450 *pca9450; unsigned int device_id, i; + const char *type_name; int ret; pca9450 = devm_kzalloc(&i2c->dev, sizeof(struct pca9450), GFP_KERNEL); @@ -1303,15 +1304,22 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) case PCA9450_TYPE_PCA9450A: regulator_desc = pca9450a_regulators; pca9450->rcnt = ARRAY_SIZE(pca9450a_regulators); + type_name = "pca9450a"; break; case PCA9450_TYPE_PCA9450BC: regulator_desc = pca9450bc_regulators; pca9450->rcnt = ARRAY_SIZE(pca9450bc_regulators); + type_name = "pca9450bc"; break; case PCA9450_TYPE_PCA9451A: + regulator_desc = pca9451a_regulators; + pca9450->rcnt = ARRAY_SIZE(pca9451a_regulators); + type_name = "pca9451a"; + break; case PCA9450_TYPE_PCA9452: regulator_desc = pca9451a_regulators; pca9450->rcnt = ARRAY_SIZE(pca9451a_regulators); + type_name = "pca9452"; break; default: dev_err(&i2c->dev, "Unknown device type"); @@ -1369,7 +1377,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) if (pca9450->irq) { ret = devm_request_threaded_irq(pca9450->dev, pca9450->irq, NULL, pca9450_irq_handler, - (IRQF_TRIGGER_FALLING | IRQF_ONESHOT), + (IRQF_TRIGGER_LOW | IRQF_ONESHOT), "pca9450-irq", pca9450); if (ret != 0) return dev_err_probe(pca9450->dev, ret, "Failed to request IRQ: %d\n", @@ -1413,9 +1421,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) pca9450_i2c_restart_handler, pca9450)) dev_warn(&i2c->dev, "Failed to register restart handler\n"); - dev_info(&i2c->dev, "%s probed.\n", - type == PCA9450_TYPE_PCA9450A ? "pca9450a" : - (type == PCA9450_TYPE_PCA9451A ? "pca9451a" : "pca9450bc")); + dev_info(&i2c->dev, "%s probed.\n", type_name); return 0; }
diff --git a/drivers/regulator/pf9453-regulator.c b/drivers/regulator/pf9453-regulator.c index 779a6fd..eed3055 100644 --- a/drivers/regulator/pf9453-regulator.c +++ b/drivers/regulator/pf9453-regulator.c
@@ -809,7 +809,7 @@ static int pf9453_i2c_probe(struct i2c_client *i2c) } ret = devm_request_threaded_irq(pf9453->dev, pf9453->irq, NULL, pf9453_irq_handler, - (IRQF_TRIGGER_FALLING | IRQF_ONESHOT), + IRQF_ONESHOT, "pf9453-irq", pf9453); if (ret) return dev_err_probe(pf9453->dev, ret, "Failed to request IRQ: %d\n", pf9453->irq);
diff --git a/drivers/regulator/tps65185.c b/drivers/regulator/tps65185.c index 3286c9a..786622d 100644 --- a/drivers/regulator/tps65185.c +++ b/drivers/regulator/tps65185.c
@@ -332,6 +332,9 @@ static int tps65185_probe(struct i2c_client *client) int i; data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->regmap = devm_regmap_init_i2c(client, ®map_config); if (IS_ERR(data->regmap)) return dev_err_probe(&client->dev, PTR_ERR(data->regmap),
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index f5f916d..8c8ddbf 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c
@@ -617,7 +617,7 @@ static int imx_rproc_prepare(struct rproc *rproc) err = of_reserved_mem_region_to_resource(np, i++, &res); if (err) - return 0; + break; /* * Ignore the first memory region which will be used vdev buffer.
diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index 4651311..bb6f6a1 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c
@@ -1592,12 +1592,51 @@ static const struct of_device_id mtk_scp_of_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_scp_of_match); +static int __maybe_unused scp_suspend(struct device *dev) +{ + struct mtk_scp *scp = dev_get_drvdata(dev); + struct rproc *rproc = scp->rproc; + + /* + * Only unprepare if the SCP is running and holding the clock. + * + * Note: `scp_ops` doesn't implement .attach() callback, hence + * `rproc->state` can never be RPROC_ATTACHED. Otherwise, it + * should also be checked here. + */ + if (rproc->state == RPROC_RUNNING) + clk_unprepare(scp->clk); + return 0; +} + +static int __maybe_unused scp_resume(struct device *dev) +{ + struct mtk_scp *scp = dev_get_drvdata(dev); + struct rproc *rproc = scp->rproc; + + /* + * Only prepare if the SCP was running and holding the clock. + * + * Note: `scp_ops` doesn't implement .attach() callback, hence + * `rproc->state` can never be RPROC_ATTACHED. Otherwise, it + * should also be checked here. + */ + if (rproc->state == RPROC_RUNNING) + return clk_prepare(scp->clk); + return 0; +} + +static const struct dev_pm_ops scp_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(scp_suspend, scp_resume) +}; + static struct platform_driver mtk_scp_driver = { .probe = scp_probe, .remove = scp_remove, .driver = { .name = "mtk-scp", .of_match_table = mtk_scp_of_match, + .pm = &scp_pm_ops, }, };
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c index cf10e8e..3ceec1f 100644 --- a/drivers/remoteproc/qcom_sysmon.c +++ b/drivers/remoteproc/qcom_sysmon.c
@@ -203,7 +203,7 @@ static const struct qmi_elem_info ssctl_shutdown_resp_ei[] = { }; struct ssctl_subsys_event_req { - u8 subsys_name_len; + u32 subsys_name_len; char subsys_name[SSCTL_SUBSYS_NAME_LENGTH]; u32 event; u8 evt_driven_valid;
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index ee18bf2..4add903 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c
@@ -537,7 +537,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) wcnss->mem_phys = wcnss->mem_reloc = res.start; wcnss->mem_size = resource_size(&res); - wcnss->mem_region = devm_ioremap_resource_wc(wcnss->dev, &res); + wcnss->mem_region = devm_ioremap_wc(wcnss->dev, wcnss->mem_phys, wcnss->mem_size); if (IS_ERR(wcnss->mem_region)) { dev_err(wcnss->dev, "unable to map memory region: %pR\n", &res); return PTR_ERR(wcnss->mem_region);
diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 1eebc26..0666be6 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c
@@ -1428,6 +1428,7 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, static int mpam_restore_mbwu_state(void *_ris) { int i; + u64 val; struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; struct mpam_class *class = ris->vmsc->comp->class; @@ -1437,6 +1438,7 @@ static int mpam_restore_mbwu_state(void *_ris) mwbu_arg.ris = ris; mwbu_arg.ctx = &ris->mbwu_state[i].cfg; mwbu_arg.type = mpam_msmon_choose_counter(class); + mwbu_arg.val = &val; __ris_msmon_read(&mwbu_arg); }
diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c index 3e8d564..31871f5 100644 --- a/drivers/resctrl/test_mpam_devices.c +++ b/drivers/resctrl/test_mpam_devices.c
@@ -322,9 +322,17 @@ static void test_mpam_enable_merge_features(struct kunit *test) mutex_unlock(&mpam_list_lock); } +static void __test_mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) +{ + /* Avoid warnings when running with CONFIG_DEBUG_PREEMPT */ + guard(preempt)(); + + mpam_reset_msc_bitmap(msc, reg, wd); +} + static void test_mpam_reset_msc_bitmap(struct kunit *test) { - char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); + char __iomem *buf = (__force char __iomem *)kunit_kzalloc(test, SZ_16K, GFP_KERNEL); struct mpam_msc fake_msc = {}; u32 *test_result; @@ -339,33 +347,33 @@ static void test_mpam_reset_msc_bitmap(struct kunit *test) mutex_init(&fake_msc.part_sel_lock); mutex_lock(&fake_msc.part_sel_lock); - test_result = (u32 *)(buf + MPAMCFG_CPBM); + test_result = (__force u32 *)(buf + MPAMCFG_CPBM); - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); KUNIT_EXPECT_EQ(test, test_result[0], 0); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); KUNIT_EXPECT_EQ(test, test_result[0], 1); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); KUNIT_EXPECT_EQ(test, test_result[0], 0xffff); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); KUNIT_EXPECT_EQ(test, test_result[1], 0); test_result[0] = 0; test_result[1] = 0; - mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); + __test_mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); KUNIT_EXPECT_EQ(test, test_result[1], 1); test_result[0] = 0;
diff --git a/drivers/reset/core.c b/drivers/reset/core.c index fceec45..352c236 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c
@@ -856,7 +856,6 @@ static int reset_add_gpio_aux_device(struct device *parent, ret = __auxiliary_device_add(adev, "reset"); if (ret) { auxiliary_device_uninit(adev); - kfree(adev); return ret; }
diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index 32bc268..fd75d96 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c
@@ -136,6 +136,9 @@ static int rzg2l_usbphy_ctrl_set_pwrrdy(struct regmap_field *pwrrdy, { u32 val = power_on ? 0 : 1; + if (!pwrrdy) + return 0; + /* The initialization path guarantees that the mask is 1 bit long. */ return regmap_field_update_bits(pwrrdy, 1, val); } @@ -347,4 +350,4 @@ module_platform_driver(rzg2l_usbphy_ctrl_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Renesas RZ/G2L USBPHY Control"); -MODULE_AUTHOR("biju.das.jz@bp.renesas.com>"); +MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
diff --git a/drivers/reset/spacemit/reset-spacemit-k3.c b/drivers/reset/spacemit/reset-spacemit-k3.c index e9e32e4..9841f5e 100644 --- a/drivers/reset/spacemit/reset-spacemit-k3.c +++ b/drivers/reset/spacemit/reset-spacemit-k3.c
@@ -112,16 +112,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_SDH0] = RESET_DATA(APMU_SDH0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH1] = RESET_DATA(APMU_SDH1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH2] = RESET_DATA(APMU_SDH2_CLK_RES_CTRL, 0, BIT(1)), - [RESET_APMU_USB2] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(1)|BIT(2)|BIT(3)), - [RESET_APMU_USB3_PORTA] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(5)|BIT(6)|BIT(7)), - [RESET_APMU_USB3_PORTB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(9)|BIT(10)|BIT(11)), - [RESET_APMU_USB3_PORTC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(13)|BIT(14)|BIT(15)), - [RESET_APMU_USB3_PORTD] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(17)|BIT(18)|BIT(19)), + [RESET_APMU_USB2_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(1)), + [RESET_APMU_USB2_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_USB2_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(3)), + [RESET_APMU_USB3_A_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(5)), + [RESET_APMU_USB3_A_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(6)), + [RESET_APMU_USB3_A_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(7)), + [RESET_APMU_USB3_B_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(9)), + [RESET_APMU_USB3_B_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(10)), + [RESET_APMU_USB3_B_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(11)), + [RESET_APMU_USB3_C_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(13)), + [RESET_APMU_USB3_C_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(14)), + [RESET_APMU_USB3_C_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(15)), + [RESET_APMU_USB3_D_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(17)), + [RESET_APMU_USB3_D_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(18)), + [RESET_APMU_USB3_D_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(19)), [RESET_APMU_QSPI] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_QSPI_BUS] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_DMA] = RESET_DATA(APMU_DMA_CLK_RES_CTRL, 0, BIT(0)), @@ -151,10 +156,12 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_CPU7_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(26), 0), [RESET_APMU_C1_MPSUB_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(28), 0), [RESET_APMU_MPSUB_DBG] = RESET_DATA(APMU_PMU_CC2_AP, BIT(29), 0), - [RESET_APMU_UCIE] = RESET_DATA(APMU_UCIE_CTRL, - BIT(1) | BIT(2) | BIT(3), 0), - [RESET_APMU_RCPU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, - BIT(3) | BIT(2) | BIT(0)), + [RESET_APMU_UCIE_IP] = RESET_DATA(APMU_UCIE_CTRL, BIT(1), 0), + [RESET_APMU_UCIE_HOT] = RESET_DATA(APMU_UCIE_CTRL, BIT(2), 0), + [RESET_APMU_UCIE_MON] = RESET_DATA(APMU_UCIE_CTRL, BIT(3), 0), + [RESET_APMU_RCPU_AUDIO_SYS] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(0)), + [RESET_APMU_RCPU_MCU_CORE] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_RCPU_AUDIO_APMU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(3)), [RESET_APMU_DSI4LN2_ESCCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(3)), [RESET_APMU_DSI4LN2_LCD_SW] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(4)), [RESET_APMU_DSI4LN2_LCD_MCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL4, 0, BIT(9)), @@ -164,16 +171,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_UFS_ACLK] = RESET_DATA(APMU_UFS_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_EDP0] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(0)), [RESET_APMU_EDP1] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(16)), - [RESET_APMU_PCIE_PORTA] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTB] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTC] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTD] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, - BIT(5) | BIT(4) | BIT(3)), + [RESET_APMU_PCIE_A_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(3)), + [RESET_APMU_PCIE_A_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(4)), + [RESET_APMU_PCIE_A_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(5)), + [RESET_APMU_PCIE_B_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(3)), + [RESET_APMU_PCIE_B_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(4)), + [RESET_APMU_PCIE_B_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(5)), + [RESET_APMU_PCIE_C_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(3)), + [RESET_APMU_PCIE_C_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(4)), + [RESET_APMU_PCIE_C_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(5)), + [RESET_APMU_PCIE_D_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(3)), + [RESET_APMU_PCIE_D_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(4)), + [RESET_APMU_PCIE_D_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(5)), + [RESET_APMU_PCIE_E_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(3)), + [RESET_APMU_PCIE_E_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(4)), + [RESET_APMU_PCIE_E_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(5)), [RESET_APMU_EMAC0] = RESET_DATA(APMU_EMAC0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC1] = RESET_DATA(APMU_EMAC1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC2] = RESET_DATA(APMU_EMAC2_CLK_RES_CTRL, 0, BIT(1)),
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cb068d5..14e58c3 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c
@@ -6135,6 +6135,7 @@ static void copy_pair_set_active(struct dasd_copy_relation *copy, char *new_busi static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid, char *sec_busid) { + struct dasd_eckd_private *prim_priv, *sec_priv; struct dasd_device *primary, *secondary; struct dasd_copy_relation *copy; struct dasd_block *block; @@ -6155,6 +6156,9 @@ static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid if (!secondary) return DASD_COPYPAIRSWAP_SECONDARY; + prim_priv = primary->private; + sec_priv = secondary->private; + /* * usually the device should be quiesced for swap * for paranoia stop device and requeue requests again @@ -6182,6 +6186,18 @@ static int dasd_eckd_copy_pair_swap(struct dasd_device *device, char *prim_busid dev_name(&secondary->cdev->dev), rc); } + if (primary->stopped & DASD_STOPPED_QUIESCE) { + dasd_device_set_stop_bits(secondary, DASD_STOPPED_QUIESCE); + dasd_device_remove_stop_bits(primary, DASD_STOPPED_QUIESCE); + } + + /* + * The secondary device never got through format detection, but since it + * is a copy of the primary device, the format is exactly the same; + * therefore, the detected layout can simply be copied. + */ + sec_priv->uses_cdl = prim_priv->uses_cdl; + /* re-enable device */ dasd_device_remove_stop_bits(primary, DASD_STOPPED_PPRC); dasd_device_remove_stop_bits(secondary, DASD_STOPPED_PPRC);
diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 573bad1..37a157a 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c
@@ -1639,11 +1639,13 @@ int cca_get_info(u16 cardnr, u16 domain, struct cca_info *ci, u32 xflags) memset(ci, 0, sizeof(*ci)); - /* get first info from zcrypt device driver about this apqn */ - rc = zcrypt_device_status_ext(cardnr, domain, &devstat); - if (rc) - return rc; - ci->hwtype = devstat.hwtype; + /* if specific domain given, fetch status and hw info for this apqn */ + if (domain != AUTOSEL_DOM) { + rc = zcrypt_device_status_ext(cardnr, domain, &devstat); + if (rc) + return rc; + ci->hwtype = devstat.hwtype; + } /* * Prep memory for rule array and var array use.
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index e9a9849..e7b0ed2 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -85,8 +85,7 @@ static ssize_t cca_serialnr_show(struct device *dev, memset(&ci, 0, sizeof(ci)); - if (ap_domain_index >= 0) - cca_get_info(ac->id, ap_domain_index, &ci, 0); + cca_get_info(ac->id, AUTOSEL_DOM, &ci, 0); return sysfs_emit(buf, "%s\n", ci.serial); }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index a0dcab5..23a3222 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -953,6 +953,10 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0); /* * The request distributor calls this function if it picked the CEXxC * device to handle a modexpo request. + * This function assumes that ap_msg has been initialized with + * ap_init_apmsg() and thus a valid buffer with the size of + * ap_msg->bufsize is available within ap_msg. Also the caller has + * to make sure ap_release_apmsg() is always called even on failure. * @zq: pointer to zcrypt_queue structure that identifies the * CEXxC device to the request distributor * @mex: pointer to the modexpo request buffer @@ -964,21 +968,17 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, struct ap_response_type *resp_type = &ap_msg->response; int rc; - ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL); - if (!ap_msg->msg) - return -ENOMEM; - ap_msg->bufsize = PAGE_SIZE; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); rc = icamex_msg_to_type6mex_msgx(zq, ap_msg, mex); if (rc) - goto out_free; + goto out; resp_type->type = CEXXC_RESPONSE_TYPE_ICA; init_completion(&resp_type->work); rc = ap_queue_message(zq->queue, ap_msg); if (rc) - goto out_free; + goto out; rc = wait_for_completion_interruptible(&resp_type->work); if (rc == 0) { rc = ap_msg->rc; @@ -991,15 +991,17 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); } -out_free: - free_page((unsigned long)ap_msg->msg); - ap_msg->msg = NULL; +out: return rc; } /* * The request distributor calls this function if it picked the CEXxC * device to handle a modexpo_crt request. + * This function assumes that ap_msg has been initialized with + * ap_init_apmsg() and thus a valid buffer with the size of + * ap_msg->bufsize is available within ap_msg. Also the caller has + * to make sure ap_release_apmsg() is always called even on failure. * @zq: pointer to zcrypt_queue structure that identifies the * CEXxC device to the request distributor * @crt: pointer to the modexpoc_crt request buffer @@ -1011,21 +1013,17 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, struct ap_response_type *resp_type = &ap_msg->response; int rc; - ap_msg->msg = (void *)get_zeroed_page(GFP_KERNEL); - if (!ap_msg->msg) - return -ENOMEM; - ap_msg->bufsize = PAGE_SIZE; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); rc = icacrt_msg_to_type6crt_msgx(zq, ap_msg, crt); if (rc) - goto out_free; + goto out; resp_type->type = CEXXC_RESPONSE_TYPE_ICA; init_completion(&resp_type->work); rc = ap_queue_message(zq->queue, ap_msg); if (rc) - goto out_free; + goto out; rc = wait_for_completion_interruptible(&resp_type->work); if (rc == 0) { rc = ap_msg->rc; @@ -1038,9 +1036,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); } -out_free: - free_page((unsigned long)ap_msg->msg); - ap_msg->msg = NULL; +out: return rc; }
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 30a9c66..c2b082f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -2578,7 +2578,7 @@ int hisi_sas_probe(struct platform_device *pdev, shost->transportt = hisi_sas_stt; shost->max_id = HISI_SAS_MAX_DEVICES; shost->max_lun = ~0; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_cmd_len = HISI_SAS_MAX_CDB_LEN; if (hisi_hba->hw->slot_index_alloc) { shost->can_queue = HISI_SAS_MAX_COMMANDS;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 2f9e0171..f69efc6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -4993,7 +4993,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) shost->transportt = hisi_sas_stt; shost->max_id = HISI_SAS_MAX_DEVICES; shost->max_lun = ~0; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_cmd_len = HISI_SAS_MAX_CDB_LEN; shost->can_queue = HISI_SAS_UNRESERVED_IPTT; shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index a20fce0..3dd2add 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4966,7 +4966,8 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt) switch (mad_status) { case IBMVFC_MAD_SUCCESS: ibmvfc_dbg(vhost, "Discover Targets succeeded\n"); - vhost->num_targets = be32_to_cpu(rsp->num_written); + vhost->num_targets = min_t(u32, be32_to_cpu(rsp->num_written), + max_targets); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS); break; case IBMVFC_MAD_FAILED:
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 94ad253..e9d9ac7 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12025,6 +12025,8 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba) iounmap(phba->sli4_hba.conf_regs_memmap_p); if (phba->sli4_hba.dpp_regs_memmap_p) iounmap(phba->sli4_hba.dpp_regs_memmap_p); + if (phba->sli4_hba.dpp_regs_memmap_wc_p) + iounmap(phba->sli4_hba.dpp_regs_memmap_wc_p); break; case LPFC_SLI_INTF_IF_TYPE_1: break;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1cbfbe4..303523f 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15977,6 +15977,32 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) return NULL; } +static __maybe_unused void __iomem * +lpfc_dpp_wc_map(struct lpfc_hba *phba, uint8_t dpp_barset) +{ + + /* DPP region is supposed to cover 64-bit BAR2 */ + if (dpp_barset != WQ_PCI_BAR_4_AND_5) { + lpfc_log_msg(phba, KERN_WARNING, LOG_INIT, + "3273 dpp_barset x%x != WQ_PCI_BAR_4_AND_5\n", + dpp_barset); + return NULL; + } + + if (!phba->sli4_hba.dpp_regs_memmap_wc_p) { + void __iomem *dpp_map; + + dpp_map = ioremap_wc(phba->pci_bar2_map, + pci_resource_len(phba->pcidev, + PCI_64BIT_BAR4)); + + if (dpp_map) + phba->sli4_hba.dpp_regs_memmap_wc_p = dpp_map; + } + + return phba->sli4_hba.dpp_regs_memmap_wc_p; +} + /** * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs * @phba: HBA structure that EQs are on. @@ -16940,9 +16966,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, uint8_t dpp_barset; uint32_t dpp_offset; uint8_t wq_create_version; -#ifdef CONFIG_X86 - unsigned long pg_addr; -#endif /* sanity check on queue memory */ if (!wq || !cq) @@ -17128,14 +17151,15 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, #ifdef CONFIG_X86 /* Enable combined writes for DPP aperture */ - pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK; - rc = set_memory_wc(pg_addr, 1); - if (rc) { + bar_memmap_p = lpfc_dpp_wc_map(phba, dpp_barset); + if (!bar_memmap_p) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3272 Cannot setup Combined " "Write on WQ[%d] - disable DPP\n", wq->queue_id); phba->cfg_enable_dpp = 0; + } else { + wq->dpp_regaddr = bar_memmap_p + dpp_offset; } #else phba->cfg_enable_dpp = 0;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index ee58383..b6d9060 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -785,6 +785,9 @@ struct lpfc_sli4_hba { void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for * dpp registers */ + void __iomem *dpp_regs_memmap_wc_p;/* Kernel memory mapped address for + * dpp registers with write combining + */ union { struct { /* IF Type 0, BAR 0 PCI cfg space reg mem map */
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 81150be..c744210 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -1618,6 +1618,7 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) ioc_info(mrioc, "successfully transitioned to %s state\n", mpi3mr_iocstate_name(ioc_state)); + mpi3mr_clear_reset_history(mrioc); return 0; } ioc_status = readl(&mrioc->sysif_regs->ioc_status); @@ -1637,6 +1638,15 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; } while (elapsed_time_sec < mrioc->ready_timeout); + ioc_state = mpi3mr_get_iocstate(mrioc); + if (ioc_state == MRIOC_STATE_READY) { + ioc_info(mrioc, + "successfully transitioned to %s state after %llu seconds\n", + mpi3mr_iocstate_name(ioc_state), elapsed_time_sec); + mpi3mr_clear_reset_history(mrioc); + return 0; + } + out_failed: elapsed_time_sec = jiffies_to_msecs(jiffies - start_time)/1000; if ((retry < 2) && (elapsed_time_sec < (mrioc->ready_timeout - 60))) { @@ -4807,21 +4817,25 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) } for (i = 0; i < mrioc->num_queues; i++) { - mrioc->op_reply_qinfo[i].qid = 0; - mrioc->op_reply_qinfo[i].ci = 0; - mrioc->op_reply_qinfo[i].num_replies = 0; - mrioc->op_reply_qinfo[i].ephase = 0; - atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); - atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); - mpi3mr_memset_op_reply_q_buffers(mrioc, i); + if (mrioc->op_reply_qinfo) { + mrioc->op_reply_qinfo[i].qid = 0; + mrioc->op_reply_qinfo[i].ci = 0; + mrioc->op_reply_qinfo[i].num_replies = 0; + mrioc->op_reply_qinfo[i].ephase = 0; + atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); + atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); + mpi3mr_memset_op_reply_q_buffers(mrioc, i); + } - mrioc->req_qinfo[i].ci = 0; - mrioc->req_qinfo[i].pi = 0; - mrioc->req_qinfo[i].num_requests = 0; - mrioc->req_qinfo[i].qid = 0; - mrioc->req_qinfo[i].reply_qid = 0; - spin_lock_init(&mrioc->req_qinfo[i].q_lock); - mpi3mr_memset_op_req_q_buffers(mrioc, i); + if (mrioc->req_qinfo) { + mrioc->req_qinfo[i].ci = 0; + mrioc->req_qinfo[i].pi = 0; + mrioc->req_qinfo[i].num_requests = 0; + mrioc->req_qinfo[i].qid = 0; + mrioc->req_qinfo[i].reply_qid = 0; + spin_lock_init(&mrioc->req_qinfo[i].q_lock); + mpi3mr_memset_op_req_q_buffers(mrioc, i); + } } atomic_set(&mrioc->pend_large_data_sz, 0);
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 6a8d35a..645524f 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -525,8 +525,9 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags) } else { task->task_done(task); } - rc = -ENODEV; - goto err_out; + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + pm8001_dbg(pm8001_ha, IO, "pm8001_task_exec device gone\n"); + return 0; } ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_dev, task);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9038f67..dbe3cd4 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2751,7 +2751,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (!elsio->u.els_logo.els_logo_pyld) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); - qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; } @@ -2776,7 +2775,6 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, if (rval != QLA_SUCCESS) { /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); - qla2x00_free_fcport(fcport); return QLA_FUNCTION_FAILED; }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 0dada89d..68a9924 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c
@@ -190,7 +190,7 @@ static struct { {"IBM", "2076", NULL, BLIST_NO_VPD_SIZE}, {"IBM", "2105", NULL, BLIST_RETRY_HWERROR}, {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN}, - {"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN}, + {"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN | BLIST_SKIP_IO_HINTS}, {"IOMEGA", "Io20S *F", NULL, BLIST_KEY}, {"INSITE", "Floptical F*8I", NULL, BLIST_KEY}, {"INSITE", "I325VM", NULL, BLIST_KEY},
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 60c06fa..7b11bc7 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c
@@ -360,11 +360,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, * default device queue depth to figure out sbitmap shift * since we use this queue depth most of times. */ - if (scsi_realloc_sdev_budget_map(sdev, depth)) { - put_device(&starget->dev); - kfree(sdev); - goto out; - } + if (scsi_realloc_sdev_budget_map(sdev, depth)) + goto out_device_destroy; scsi_change_queue_depth(sdev, depth);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 12124f9..1341270 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c
@@ -1734,7 +1734,7 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, break; default: - if (channel < shost->max_channel) { + if (channel <= shost->max_channel) { res = scsi_scan_host_selected(shost, channel, id, lun, SCSI_SCAN_MANUAL); } else {
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 35101e9..4c34864 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c
@@ -215,7 +215,7 @@ static unsigned char *ses_get_page2_descriptor(struct enclosure_device *edev, unsigned char *type_ptr = ses_dev->page1_types; unsigned char *desc_ptr = ses_dev->page2 + 8; - if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len) < 0) + if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len)) return NULL; for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) { @@ -528,9 +528,8 @@ struct efd { }; static int ses_enclosure_find_by_addr(struct enclosure_device *edev, - void *data) + struct efd *efd) { - struct efd *efd = data; int i; struct ses_component *scomp; @@ -683,7 +682,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, if (efd.addr) { efd.dev = &sdev->sdev_gendev; - enclosure_for_each_device(ses_enclosure_find_by_addr, &efd); + ses_enclosure_find_by_addr(edev, &efd); } }
diff --git a/drivers/scsi/snic/vnic_dev.c b/drivers/scsi/snic/vnic_dev.c index c692de0..ed7771e 100644 --- a/drivers/scsi/snic/vnic_dev.c +++ b/drivers/scsi/snic/vnic_dev.c
@@ -42,8 +42,6 @@ struct vnic_dev { struct vnic_devcmd_notify *notify; struct vnic_devcmd_notify notify_copy; dma_addr_t notify_pa; - u32 *linkstatus; - dma_addr_t linkstatus_pa; struct vnic_stats *stats; dma_addr_t stats_pa; struct vnic_devcmd_fw_info *fw_info; @@ -650,8 +648,6 @@ int svnic_dev_init(struct vnic_dev *vdev, int arg) int svnic_dev_link_status(struct vnic_dev *vdev) { - if (vdev->linkstatus) - return *vdev->linkstatus; if (!vnic_dev_notify_ready(vdev)) return 0; @@ -686,11 +682,6 @@ void svnic_dev_unregister(struct vnic_dev *vdev) sizeof(struct vnic_devcmd_notify), vdev->notify, vdev->notify_pa); - if (vdev->linkstatus) - dma_free_coherent(&vdev->pdev->dev, - sizeof(u32), - vdev->linkstatus, - vdev->linkstatus_pa); if (vdev->stats) dma_free_coherent(&vdev->pdev->dev, sizeof(struct vnic_stats),
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 664ad55..ae1abab 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c
@@ -1856,8 +1856,9 @@ static enum scsi_qc_status storvsc_queuecommand(struct Scsi_Host *host, cmd_request->payload_sz = payload_sz; /* Invokes the vsc to start an IO */ - ret = storvsc_do_io(dev, cmd_request, get_cpu()); - put_cpu(); + migrate_disable(); + ret = storvsc_do_io(dev, cmd_request, smp_processor_id()); + migrate_enable(); if (ret) scsi_dma_unmap(scmnd);
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 42cde00..989bcae 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c
@@ -1175,7 +1175,7 @@ static void scsifront_backend_changed(struct xenbus_device *dev, return; } - if (xenbus_read_driver_state(dev->nodename) == + if (xenbus_read_driver_state(dev, dev->nodename) == XenbusStateInitialised) scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index 9aa7218..1ed6be6 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1535,10 +1535,8 @@ static int of_qcom_slim_ngd_register(struct device *parent, ngd->id = id; ngd->pdev->dev.parent = parent; - ret = driver_set_override(&ngd->pdev->dev, - &ngd->pdev->driver_override, - QCOM_SLIM_NGD_DRV_NAME, - strlen(QCOM_SLIM_NGD_DRV_NAME)); + ret = device_set_driver_override(&ngd->pdev->dev, + QCOM_SLIM_NGD_DRV_NAME); if (ret) { platform_device_put(ngd->pdev); kfree(ngd);
diff --git a/drivers/soc/aspeed/aspeed-socinfo.c b/drivers/soc/aspeed/aspeed-socinfo.c index 5e34e01..fb8fde9 100644 --- a/drivers/soc/aspeed/aspeed-socinfo.c +++ b/drivers/soc/aspeed/aspeed-socinfo.c
@@ -39,7 +39,7 @@ static const char *siliconid_to_name(u32 siliconid) unsigned int i; for (i = 0 ; i < ARRAY_SIZE(rev_table) ; ++i) { - if (rev_table[i].id == id) + if ((rev_table[i].id & 0xff00ffff) == id) return rev_table[i].name; }
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 411381f..9ddafcb 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c
@@ -1827,6 +1827,8 @@ EXPORT_SYMBOL(qman_create_fq); void qman_destroy_fq(struct qman_fq *fq) { + int leaked; + /* * We don't need to lock the FQ as it is a pre-condition that the FQ be * quiesced. Instead, run some checks. @@ -1834,11 +1836,29 @@ void qman_destroy_fq(struct qman_fq *fq) switch (fq->state) { case qman_fq_state_parked: case qman_fq_state_oos: - if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID)) - qman_release_fqid(fq->fqid); + /* + * There's a race condition here on releasing the fqid, + * setting the fq_table to NULL, and freeing the fqid. + * To prevent it, this order should be respected: + */ + if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID)) { + leaked = qman_shutdown_fq(fq->fqid); + if (leaked) + pr_debug("FQID %d leaked\n", fq->fqid); + } DPAA_ASSERT(fq_table[fq->idx]); fq_table[fq->idx] = NULL; + + if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID) && !leaked) { + /* + * fq_table[fq->idx] should be set to null before + * freeing fq->fqid otherwise it could by allocated by + * qman_alloc_fqid() while still being !NULL + */ + smp_wmb(); + gen_pool_free(qm_fqalloc, fq->fqid | DPAA_GENALLOC_OFF, 1); + } return; default: break;
diff --git a/drivers/soc/fsl/qe/qmc.c b/drivers/soc/fsl/qe/qmc.c index c4587b3..672adff 100644 --- a/drivers/soc/fsl/qe/qmc.c +++ b/drivers/soc/fsl/qe/qmc.c
@@ -1790,8 +1790,8 @@ static int qmc_qe_init_resources(struct qmc *qmc, struct platform_device *pdev) return -EINVAL; qmc->dpram_offset = res->start - qe_muram_dma(qe_muram_addr(0)); qmc->dpram = devm_ioremap_resource(qmc->dev, res); - if (IS_ERR(qmc->scc_pram)) - return PTR_ERR(qmc->scc_pram); + if (IS_ERR(qmc->dpram)) + return PTR_ERR(qmc->dpram); return 0; }
diff --git a/drivers/soc/microchip/mpfs-control-scb.c b/drivers/soc/microchip/mpfs-control-scb.c index f0b84b1..8dda570 100644 --- a/drivers/soc/microchip/mpfs-control-scb.c +++ b/drivers/soc/microchip/mpfs-control-scb.c
@@ -14,8 +14,10 @@ static int mpfs_control_scb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - return mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_control_scb_devs, - ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, NULL); + return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + mpfs_control_scb_devs, + ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, + NULL); } static const struct of_device_id mpfs_control_scb_of_match[] = {
diff --git a/drivers/soc/microchip/mpfs-mss-top-sysreg.c b/drivers/soc/microchip/mpfs-mss-top-sysreg.c index b2244e4..b0f42b8 100644 --- a/drivers/soc/microchip/mpfs-mss-top-sysreg.c +++ b/drivers/soc/microchip/mpfs-mss-top-sysreg.c
@@ -16,8 +16,10 @@ static int mpfs_mss_top_sysreg_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; - ret = mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_mss_top_sysreg_devs, - ARRAY_SIZE(mpfs_mss_top_sysreg_devs) , NULL, 0, NULL); + ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + mpfs_mss_top_sysreg_devs, + ARRAY_SIZE(mpfs_mss_top_sysreg_devs), NULL, + 0, NULL); if (ret) return ret;
diff --git a/drivers/soc/microchip/mpfs-sys-controller.c b/drivers/soc/microchip/mpfs-sys-controller.c index 8e7ae3cb..10b2fc3 100644 --- a/drivers/soc/microchip/mpfs-sys-controller.c +++ b/drivers/soc/microchip/mpfs-sys-controller.c
@@ -142,8 +142,10 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) sys_controller->flash = of_get_mtd_device_by_node(np); of_node_put(np); - if (IS_ERR(sys_controller->flash)) - return dev_err_probe(dev, PTR_ERR(sys_controller->flash), "Failed to get flash\n"); + if (IS_ERR(sys_controller->flash)) { + ret = dev_err_probe(dev, PTR_ERR(sys_controller->flash), "Failed to get flash\n"); + goto out_free; + } no_flash: sys_controller->client.dev = dev; @@ -155,8 +157,7 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) if (IS_ERR(sys_controller->chan)) { ret = dev_err_probe(dev, PTR_ERR(sys_controller->chan), "Failed to get mbox channel\n"); - kfree(sys_controller); - return ret; + goto out_free; } init_completion(&sys_controller->c); @@ -174,6 +175,10 @@ static int mpfs_sys_controller_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Registered MPFS system controller\n"); return 0; + +out_free: + kfree(sys_controller); + return ret; } static void mpfs_sys_controller_remove(struct platform_device *pdev)
diff --git a/drivers/soc/qcom/pdr_internal.h b/drivers/soc/qcom/pdr_internal.h index 039508c..047c016 100644 --- a/drivers/soc/qcom/pdr_internal.h +++ b/drivers/soc/qcom/pdr_internal.h
@@ -84,7 +84,7 @@ struct servreg_set_ack_resp { struct servreg_loc_pfr_req { char service[SERVREG_NAME_LENGTH + 1]; - char reason[257]; + char reason[SERVREG_PFR_LENGTH + 1]; }; struct servreg_loc_pfr_resp {
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index d0afdcb..619bad2c 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -62,6 +62,9 @@ struct usbc_notify { u8 orientation; u8 mux_ctrl; #define MUX_CTRL_STATE_NO_CONN 0 +#define MUX_CTRL_STATE_USB3_ONLY 1 +#define MUX_CTRL_STATE_DP4LN 2 +#define MUX_CTRL_STATE_USB3_DP 3 #define MUX_CTRL_STATE_TUNNELING 4 u8 res; @@ -350,15 +353,20 @@ static void pmic_glink_altmode_worker(struct work_struct *work) typec_switch_set(alt_port->typec_switch, alt_port->orientation); - if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { - pmic_glink_altmode_safe(altmode, alt_port); - } else if (alt_port->svid == USB_TYPEC_TBT_SID) { - pmic_glink_altmode_enable_tbt(altmode, alt_port); - } else if (alt_port->svid == USB_TYPEC_DP_SID) { - pmic_glink_altmode_enable_dp(altmode, alt_port, - alt_port->mode, - alt_port->hpd_state, - alt_port->hpd_irq); + /* + * MUX_CTRL_STATE_DP4LN/USB3_DP may only be set if SVID=DP, but we need + * to special-case the SVID=DP && mux_ctrl=NO_CONN case to deliver a + * HPD notification + */ + if (alt_port->svid == USB_TYPEC_DP_SID) { + if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { + pmic_glink_altmode_safe(altmode, alt_port); + } else { + pmic_glink_altmode_enable_dp(altmode, alt_port, + alt_port->mode, + alt_port->hpd_state, + alt_port->hpd_irq); + } if (alt_port->hpd_state) conn_status = connector_status_connected; @@ -367,9 +375,18 @@ static void pmic_glink_altmode_worker(struct work_struct *work) drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, conn_status); } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_TUNNELING) { - pmic_glink_altmode_enable_usb4(altmode, alt_port); - } else { + if (alt_port->svid == USB_TYPEC_TBT_SID) + pmic_glink_altmode_enable_tbt(altmode, alt_port); + else + pmic_glink_altmode_enable_usb4(altmode, alt_port); + } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_USB3_ONLY) { pmic_glink_altmode_enable_usb(altmode, alt_port); + } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { + pmic_glink_altmode_safe(altmode, alt_port); + } else { + dev_err(altmode->dev, "Got unknown mux_ctrl: %u on port %u, forcing safe mode\n", + alt_port->mux_ctrl, alt_port->index); + pmic_glink_altmode_safe(altmode, alt_port); } pmic_glink_altmode_request(altmode, ALTMODE_PAN_ACK, alt_port->index);
diff --git a/drivers/soc/qcom/qcom_pdr_msg.c b/drivers/soc/qcom/qcom_pdr_msg.c index ca98932..02022b1 100644 --- a/drivers/soc/qcom/qcom_pdr_msg.c +++ b/drivers/soc/qcom/qcom_pdr_msg.c
@@ -325,7 +325,7 @@ const struct qmi_elem_info servreg_loc_pfr_req_ei[] = { }, { .data_type = QMI_STRING, - .elem_len = SERVREG_NAME_LENGTH + 1, + .elem_len = SERVREG_PFR_LENGTH + 1, .elem_size = sizeof(char), .array_type = VAR_LEN_ARRAY, .tlv_type = 0x02,
diff --git a/drivers/soc/rockchip/grf.c b/drivers/soc/rockchip/grf.c index 04937c4..b459607 100644 --- a/drivers/soc/rockchip/grf.c +++ b/drivers/soc/rockchip/grf.c
@@ -231,6 +231,7 @@ static int __init rockchip_grf_init(void) grf = syscon_node_to_regmap(np); if (IS_ERR(grf)) { pr_err("%s: could not get grf syscon\n", __func__); + of_node_put(np); return PTR_ERR(grf); }
diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 2aef528..16346f5 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c
@@ -411,7 +411,7 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, ret = dma_mapping_error(sfc->dev, sfc->daddr); if (ret) { dev_err(sfc->dev, "DMA mapping error\n"); - goto out_map_data; + return ret; } cmd = CMD_DATA_ADDRL(sfc->daddr); @@ -429,7 +429,6 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, ret = dma_mapping_error(sfc->dev, sfc->iaddr); if (ret) { dev_err(sfc->dev, "DMA mapping error\n"); - dma_unmap_single(sfc->dev, sfc->daddr, datalen, dir); goto out_map_data; } @@ -448,7 +447,7 @@ static int aml_sfc_dma_buffer_setup(struct aml_sfc *sfc, void *databuf, return 0; out_map_info: - dma_unmap_single(sfc->dev, sfc->iaddr, datalen, dir); + dma_unmap_single(sfc->dev, sfc->iaddr, infolen, dir); out_map_data: dma_unmap_single(sfc->dev, sfc->daddr, datalen, dir); @@ -1067,6 +1066,13 @@ static const struct nand_ecc_engine_ops aml_sfc_ecc_engine_ops = { .finish_io_req = aml_sfc_ecc_finish_io_req, }; +static void aml_sfc_unregister_ecc_engine(void *data) +{ + struct nand_ecc_engine *eng = data; + + nand_ecc_unregister_on_host_hw_engine(eng); +} + static int aml_sfc_clk_init(struct aml_sfc *sfc) { sfc->gate_clk = devm_clk_get_enabled(sfc->dev, "gate"); @@ -1084,14 +1090,6 @@ static int aml_sfc_clk_init(struct aml_sfc *sfc) return clk_set_rate(sfc->core_clk, SFC_BUS_DEFAULT_CLK); } -static int aml_sfc_disable_clk(struct aml_sfc *sfc) -{ - clk_disable_unprepare(sfc->core_clk); - clk_disable_unprepare(sfc->gate_clk); - - return 0; -} - static int aml_sfc_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1142,16 +1140,12 @@ static int aml_sfc_probe(struct platform_device *pdev) /* Enable Amlogic flash controller spi mode */ ret = regmap_write(sfc->regmap_base, SFC_SPI_CFG, SPI_MODE_EN); - if (ret) { - dev_err(dev, "failed to enable SPI mode\n"); - goto err_out; - } + if (ret) + return dev_err_probe(dev, ret, "failed to enable SPI mode\n"); ret = dma_set_mask(sfc->dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(sfc->dev, "failed to set dma mask\n"); - goto err_out; - } + if (ret) + return dev_err_probe(sfc->dev, ret, "failed to set dma mask\n"); sfc->ecc_eng.dev = &pdev->dev; sfc->ecc_eng.integration = NAND_ECC_ENGINE_INTEGRATION_PIPELINED; @@ -1159,10 +1153,13 @@ static int aml_sfc_probe(struct platform_device *pdev) sfc->ecc_eng.priv = sfc; ret = nand_ecc_register_on_host_hw_engine(&sfc->ecc_eng); - if (ret) { - dev_err(&pdev->dev, "failed to register Aml host ecc engine.\n"); - goto err_out; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to register Aml host ecc engine.\n"); + + ret = devm_add_action_or_reset(dev, aml_sfc_unregister_ecc_engine, + &sfc->ecc_eng); + if (ret) + return dev_err_probe(dev, ret, "failed to add ECC unregister action\n"); ret = of_property_read_u32(np, "amlogic,rx-adj", &val); if (!ret) @@ -1178,24 +1175,7 @@ static int aml_sfc_probe(struct platform_device *pdev) ctrl->min_speed_hz = SFC_MIN_FREQUENCY; ctrl->num_chipselect = SFC_MAX_CS_NUM; - ret = devm_spi_register_controller(dev, ctrl); - if (ret) - goto err_out; - - return 0; - -err_out: - aml_sfc_disable_clk(sfc); - - return ret; -} - -static void aml_sfc_remove(struct platform_device *pdev) -{ - struct spi_controller *ctlr = platform_get_drvdata(pdev); - struct aml_sfc *sfc = spi_controller_get_devdata(ctlr); - - aml_sfc_disable_clk(sfc); + return devm_spi_register_controller(dev, ctrl); } static const struct of_device_id aml_sfc_of_match[] = { @@ -1213,7 +1193,6 @@ static struct platform_driver aml_sfc_driver = { .of_match_table = aml_sfc_of_match, }, .probe = aml_sfc_probe, - .remove = aml_sfc_remove, }; module_platform_driver(aml_sfc_driver);
diff --git a/drivers/spi/spi-amlogic-spisg.c b/drivers/spi/spi-amlogic-spisg.c index 1509df2..9d568e3 100644 --- a/drivers/spi/spi-amlogic-spisg.c +++ b/drivers/spi/spi-amlogic-spisg.c
@@ -729,9 +729,9 @@ static int aml_spisg_probe(struct platform_device *pdev) }; if (of_property_read_bool(dev->of_node, "spi-slave")) - ctlr = spi_alloc_target(dev, sizeof(*spisg)); + ctlr = devm_spi_alloc_target(dev, sizeof(*spisg)); else - ctlr = spi_alloc_host(dev, sizeof(*spisg)); + ctlr = devm_spi_alloc_host(dev, sizeof(*spisg)); if (!ctlr) return -ENOMEM; @@ -750,10 +750,8 @@ static int aml_spisg_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(spisg->map), "regmap init failed\n"); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto out_controller; - } + if (irq < 0) + return irq; ret = device_reset_optional(dev); if (ret) @@ -817,8 +815,6 @@ static int aml_spisg_probe(struct platform_device *pdev) if (spisg->core) clk_disable_unprepare(spisg->core); clk_disable_unprepare(spisg->pclk); -out_controller: - spi_controller_put(ctlr); return ret; }
diff --git a/drivers/spi/spi-atcspi200.c b/drivers/spi/spi-atcspi200.c index 60a37ff..2665f31a 100644 --- a/drivers/spi/spi-atcspi200.c +++ b/drivers/spi/spi-atcspi200.c
@@ -195,7 +195,15 @@ static void atcspi_set_trans_ctl(struct atcspi_dev *spi, if (op->addr.buswidth > 1) tc |= TRANS_ADDR_FMT; if (op->data.nbytes) { - tc |= TRANS_DUAL_QUAD(ffs(op->data.buswidth) - 1); + unsigned int width_code; + + width_code = ffs(op->data.buswidth) - 1; + if (unlikely(width_code > 3)) { + WARN_ON_ONCE(1); + width_code = 0; + } + tc |= TRANS_DUAL_QUAD(width_code); + if (op->data.dir == SPI_MEM_DATA_IN) { if (op->dummy.nbytes) tc |= TRANS_MODE_DMY_READ | @@ -497,31 +505,17 @@ static int atcspi_init_resources(struct platform_device *pdev, static int atcspi_configure_dma(struct atcspi_dev *spi) { - struct dma_chan *dma_chan; - int ret = 0; + spi->host->dma_rx = devm_dma_request_chan(spi->dev, "rx"); + if (IS_ERR(spi->host->dma_rx)) + return PTR_ERR(spi->host->dma_rx); - dma_chan = devm_dma_request_chan(spi->dev, "rx"); - if (IS_ERR(dma_chan)) { - ret = PTR_ERR(dma_chan); - goto err_exit; - } - spi->host->dma_rx = dma_chan; + spi->host->dma_tx = devm_dma_request_chan(spi->dev, "tx"); + if (IS_ERR(spi->host->dma_tx)) + return PTR_ERR(spi->host->dma_tx); - dma_chan = devm_dma_request_chan(spi->dev, "tx"); - if (IS_ERR(dma_chan)) { - ret = PTR_ERR(dma_chan); - goto free_rx; - } - spi->host->dma_tx = dma_chan; init_completion(&spi->dma_completion); - return ret; - -free_rx: - dma_release_channel(spi->host->dma_rx); - spi->host->dma_rx = NULL; -err_exit: - return ret; + return 0; } static int atcspi_enable_clk(struct atcspi_dev *spi)
diff --git a/drivers/spi/spi-axiado.c b/drivers/spi/spi-axiado.c index 8cea814..8ddcd27 100644 --- a/drivers/spi/spi-axiado.c +++ b/drivers/spi/spi-axiado.c
@@ -765,30 +765,22 @@ static int ax_spi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ctlr); xspi->regs = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(xspi->regs)) { - ret = PTR_ERR(xspi->regs); - goto remove_ctlr; - } + if (IS_ERR(xspi->regs)) + return PTR_ERR(xspi->regs); xspi->pclk = devm_clk_get(&pdev->dev, "pclk"); - if (IS_ERR(xspi->pclk)) { - dev_err(&pdev->dev, "pclk clock not found.\n"); - ret = PTR_ERR(xspi->pclk); - goto remove_ctlr; - } + if (IS_ERR(xspi->pclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(xspi->pclk), + "pclk clock not found.\n"); xspi->ref_clk = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(xspi->ref_clk)) { - dev_err(&pdev->dev, "ref clock not found.\n"); - ret = PTR_ERR(xspi->ref_clk); - goto remove_ctlr; - } + if (IS_ERR(xspi->ref_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(xspi->ref_clk), + "ref clock not found.\n"); ret = clk_prepare_enable(xspi->pclk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable APB clock.\n"); - goto remove_ctlr; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, "Unable to enable APB clock.\n"); ret = clk_prepare_enable(xspi->ref_clk); if (ret) { @@ -869,8 +861,7 @@ static int ax_spi_probe(struct platform_device *pdev) clk_disable_unprepare(xspi->ref_clk); clk_dis_apb: clk_disable_unprepare(xspi->pclk); -remove_ctlr: - spi_controller_put(ctlr); + return ret; }
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 649ff55..2ead419 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c
@@ -76,6 +76,11 @@ struct cqspi_flash_pdata { u8 cs; }; +static const struct clk_bulk_data cqspi_clks[CLK_QSPI_NUM] = { + [CLK_QSPI_APB] = { .id = "apb" }, + [CLK_QSPI_AHB] = { .id = "ahb" }, +}; + struct cqspi_st { struct platform_device *pdev; struct spi_controller *host; @@ -1478,14 +1483,6 @@ static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) if (refcount_read(&cqspi->inflight_ops) == 0) return -ENODEV; - if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { - ret = pm_runtime_resume_and_get(dev); - if (ret) { - dev_err(&mem->spi->dev, "resume failed with %d\n", ret); - return ret; - } - } - if (!refcount_read(&cqspi->refcount)) return -EBUSY; @@ -1497,6 +1494,14 @@ static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) return -EBUSY; } + if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { + ret = pm_runtime_resume_and_get(dev); + if (ret) { + dev_err(&mem->spi->dev, "resume failed with %d\n", ret); + goto dec_inflight_refcount; + } + } + ret = cqspi_mem_process(mem, op); if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) @@ -1505,6 +1510,7 @@ static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) if (ret) dev_err(&mem->spi->dev, "operation failed with %d\n", ret); +dec_inflight_refcount: if (refcount_read(&cqspi->inflight_ops) > 1) refcount_dec(&cqspi->inflight_ops); @@ -1823,6 +1829,7 @@ static int cqspi_probe(struct platform_device *pdev) } /* Obtain QSPI clocks. */ + memcpy(&cqspi->clks, &cqspi_clks, sizeof(cqspi->clks)); ret = devm_clk_bulk_get_optional(dev, CLK_QSPI_NUM, cqspi->clks); if (ret) return dev_err_probe(dev, ret, "Failed to get clocks\n");
diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 65adec7..fe726b9 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c
@@ -271,7 +271,7 @@ static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed) msecs_to_jiffies(ms)); if (ms == 0) { - dev_err(&dws->ctlr->cur_msg->spi->dev, + dev_err(&dws->ctlr->dev, "DMA transaction timed out\n"); return -ETIMEDOUT; }
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index b361c1b..45390e9 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c
@@ -1009,7 +1009,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) enable_irq(irq); } - ret = devm_spi_register_controller(&pdev->dev, controller); + ret = spi_register_controller(controller); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "spi_register_controller error\n"); goto free_dma; @@ -1035,6 +1035,7 @@ static void fsl_lpspi_remove(struct platform_device *pdev) struct fsl_lpspi_data *fsl_lpspi = spi_controller_get_devdata(controller); + spi_unregister_controller(controller); fsl_lpspi_dma_exit(controller); pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 43ce47f..d5fb0ed 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c
@@ -359,9 +359,9 @@ static int setup_fifo_params(struct spi_device *spi_slv, writel((spi_slv->mode & SPI_LOOP) ? LOOPBACK_ENABLE : 0, se->base + SE_SPI_LOOPBACK); if (cs_changed) writel(chipselect, se->base + SE_SPI_DEMUX_SEL); - if (mode_changed & SE_SPI_CPHA) + if (mode_changed & SPI_CPHA) writel((spi_slv->mode & SPI_CPHA) ? CPHA : 0, se->base + SE_SPI_CPHA); - if (mode_changed & SE_SPI_CPOL) + if (mode_changed & SPI_CPOL) writel((spi_slv->mode & SPI_CPOL) ? CPOL : 0, se->base + SE_SPI_CPOL); if ((mode_changed & SPI_CS_HIGH) || (cs_changed && (spi_slv->mode & SPI_CS_HIGH))) writel((spi_slv->mode & SPI_CS_HIGH) ? BIT(chipselect) : 0, se->base + SE_SPI_DEMUX_OUTPUT_INV); @@ -906,10 +906,13 @@ static irqreturn_t geni_spi_isr(int irq, void *data) struct spi_controller *spi = data; struct spi_geni_master *mas = spi_controller_get_devdata(spi); struct geni_se *se = &mas->se; - u32 m_irq; + u32 m_irq, dma_tx_status, dma_rx_status; m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); - if (!m_irq) + dma_tx_status = readl_relaxed(se->base + SE_DMA_TX_IRQ_STAT); + dma_rx_status = readl_relaxed(se->base + SE_DMA_RX_IRQ_STAT); + + if (!m_irq && !dma_tx_status && !dma_rx_status) return IRQ_NONE; if (m_irq & (M_CMD_OVERRUN_EN | M_ILLEGAL_CMD_EN | M_CMD_FAILURE_EN | @@ -957,8 +960,6 @@ static irqreturn_t geni_spi_isr(int irq, void *data) } } else if (mas->cur_xfer_mode == GENI_SE_DMA) { const struct spi_transfer *xfer = mas->cur_xfer; - u32 dma_tx_status = readl_relaxed(se->base + SE_DMA_TX_IRQ_STAT); - u32 dma_rx_status = readl_relaxed(se->base + SE_DMA_RX_IRQ_STAT); if (dma_tx_status) writel(dma_tx_status, se->base + SE_DMA_TX_IRQ_CLR);
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index bce3d14..d8ef8f8 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c
@@ -96,6 +96,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0xd323), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xe323), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xe423), (unsigned long)&cnl_info }, { },
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index a7001b9..57768da 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c
@@ -1101,8 +1101,6 @@ static void meson_spicc_remove(struct platform_device *pdev) /* Disable SPI */ writel(0, spicc->base + SPICC_CONREG); - - spi_controller_put(spicc->host); } static const struct meson_spicc_data meson_spicc_gx_data = {
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c index 2990bf8..1749950 100644 --- a/drivers/spi/spi-rockchip-sfc.c +++ b/drivers/spi/spi-rockchip-sfc.c
@@ -711,7 +711,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev) } } - ret = devm_spi_register_controller(dev, host); + ret = spi_register_controller(host); if (ret) goto err_register;
diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index bfcc140..3c61c79 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c
@@ -612,7 +612,7 @@ static int f_ospi_probe(struct platform_device *pdev) u32 num_cs = OSPI_NUM_CS; int ret; - ctlr = spi_alloc_host(dev, sizeof(*ospi)); + ctlr = devm_spi_alloc_host(dev, sizeof(*ospi)); if (!ctlr) return -ENOMEM; @@ -635,43 +635,22 @@ static int f_ospi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ospi); ospi->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(ospi->base)) { - ret = PTR_ERR(ospi->base); - goto err_put_ctlr; - } + if (IS_ERR(ospi->base)) + return PTR_ERR(ospi->base); ospi->clk = devm_clk_get_enabled(dev, NULL); - if (IS_ERR(ospi->clk)) { - ret = PTR_ERR(ospi->clk); - goto err_put_ctlr; - } + if (IS_ERR(ospi->clk)) + return PTR_ERR(ospi->clk); - mutex_init(&ospi->mlock); + ret = devm_mutex_init(dev, &ospi->mlock); + if (ret) + return ret; ret = f_ospi_init(ospi); if (ret) - goto err_destroy_mutex; + return ret; - ret = devm_spi_register_controller(dev, ctlr); - if (ret) - goto err_destroy_mutex; - - return 0; - -err_destroy_mutex: - mutex_destroy(&ospi->mlock); - -err_put_ctlr: - spi_controller_put(ctlr); - - return ret; -} - -static void f_ospi_remove(struct platform_device *pdev) -{ - struct f_ospi *ospi = platform_get_drvdata(pdev); - - mutex_destroy(&ospi->mlock); + return devm_spi_register_controller(dev, ctlr); } static const struct of_device_id f_ospi_dt_ids[] = { @@ -686,7 +665,6 @@ static struct platform_driver f_ospi_driver = { .of_match_table = f_ospi_dt_ids, }, .probe = f_ospi_probe, - .remove = f_ospi_remove, }; module_platform_driver(f_ospi_driver);
diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index c98afe0..38405f8 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c
@@ -928,7 +928,7 @@ static int stm32_ospi_probe(struct platform_device *pdev) dma_cfg.dst_addr = ospi->regs_phys_base + OSPI_DR; ret = stm32_ospi_dma_setup(ospi, &dma_cfg); if (ret) - return ret; + goto err_dma_free; mutex_init(&ospi->lock); @@ -965,19 +965,22 @@ static int stm32_ospi_probe(struct platform_device *pdev) if (ret) { /* Disable ospi */ writel_relaxed(0, ospi->regs_base + OSPI_CR); - goto err_pm_resume; + goto err_reset_control; } pm_runtime_put_autosuspend(ospi->dev); return 0; +err_reset_control: + reset_control_release(ospi->rstc); err_pm_resume: pm_runtime_put_sync_suspend(ospi->dev); err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); +err_dma_free: if (ospi->dma_chtx) dma_release_channel(ospi->dma_chtx); if (ospi->dma_chrx) @@ -989,11 +992,8 @@ static int stm32_ospi_probe(struct platform_device *pdev) static void stm32_ospi_remove(struct platform_device *pdev) { struct stm32_ospi *ospi = platform_get_drvdata(pdev); - int ret; - ret = pm_runtime_resume_and_get(ospi->dev); - if (ret < 0) - return; + pm_runtime_resume_and_get(ospi->dev); spi_unregister_controller(ospi->ctrl); /* Disable ospi */
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b99de8c..33f211e 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c
@@ -1625,6 +1625,9 @@ static int stm32_spi_prepare_rx_dma_mdma_chaining(struct stm32_spi *spi, return -EINVAL; } + *rx_mdma_desc = _mdma_desc; + *rx_dma_desc = _dma_desc; + return 0; }
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 61f7bde..9b11255 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c
@@ -50,7 +50,6 @@ static void spidev_release(struct device *dev) struct spi_device *spi = to_spi_device(dev); spi_controller_put(spi->controller); - kfree(spi->driver_override); free_percpu(spi->pcpu_statistics); kfree(spi); } @@ -73,10 +72,9 @@ static ssize_t driver_override_store(struct device *dev, struct device_attribute *a, const char *buf, size_t count) { - struct spi_device *spi = to_spi_device(dev); int ret; - ret = driver_set_override(dev, &spi->driver_override, buf, count); + ret = __device_set_driver_override(dev, buf, count); if (ret) return ret; @@ -86,13 +84,8 @@ static ssize_t driver_override_store(struct device *dev, static ssize_t driver_override_show(struct device *dev, struct device_attribute *a, char *buf) { - const struct spi_device *spi = to_spi_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", spi->driver_override ? : ""); - device_unlock(dev); - return len; + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name ?: ""); } static DEVICE_ATTR_RW(driver_override); @@ -376,10 +369,12 @@ static int spi_match_device(struct device *dev, const struct device_driver *drv) { const struct spi_device *spi = to_spi_device(dev); const struct spi_driver *sdrv = to_spi_driver(drv); + int ret; /* Check override first, and if set, only use the named driver */ - if (spi->driver_override) - return strcmp(spi->driver_override, drv->name) == 0; + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Attempt an OF style match */ if (of_driver_match_device(dev, drv)) @@ -3049,6 +3044,8 @@ static void spi_controller_release(struct device *dev) struct spi_controller *ctlr; ctlr = container_of(dev, struct spi_controller, dev); + + free_percpu(ctlr->pcpu_statistics); kfree(ctlr); } @@ -3192,6 +3189,12 @@ struct spi_controller *__spi_alloc_controller(struct device *dev, if (!ctlr) return NULL; + ctlr->pcpu_statistics = spi_alloc_pcpu_stats(NULL); + if (!ctlr->pcpu_statistics) { + kfree(ctlr); + return NULL; + } + device_initialize(&ctlr->dev); INIT_LIST_HEAD(&ctlr->queue); spin_lock_init(&ctlr->queue_lock); @@ -3480,17 +3483,8 @@ int spi_register_controller(struct spi_controller *ctlr) dev_info(dev, "controller is unqueued, this is deprecated\n"); } else if (ctlr->transfer_one || ctlr->transfer_one_message) { status = spi_controller_initialize_queue(ctlr); - if (status) { - device_del(&ctlr->dev); - goto free_bus_id; - } - } - /* Add statistics */ - ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev); - if (!ctlr->pcpu_statistics) { - dev_err(dev, "Error allocating per-cpu statistics\n"); - status = -ENOMEM; - goto destroy_queue; + if (status) + goto del_ctrl; } mutex_lock(&board_lock); @@ -3504,8 +3498,8 @@ int spi_register_controller(struct spi_controller *ctlr) acpi_register_spi_devices(ctlr); return status; -destroy_queue: - spi_destroy_queue(ctlr); +del_ctrl: + device_del(&ctlr->dev); free_bus_id: mutex_lock(&board_lock); idr_remove(&spi_controller_idr, ctlr->bus_num); @@ -3540,8 +3534,19 @@ int devm_spi_register_controller(struct device *dev, if (ret) return ret; - return devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + /* + * Prevent controller from being freed by spi_unregister_controller() + * if devm_add_action_or_reset() fails for a non-devres allocated + * controller. + */ + spi_controller_get(ctlr); + ret = devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + + if (ret == 0 || ctlr->devm_allocated) + spi_controller_put(ctlr); + + return ret; } EXPORT_SYMBOL_GPL(devm_spi_register_controller);
diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c index 6cf217e..3e2b5e6 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
@@ -186,20 +186,25 @@ u8 *rtw_get_ie_ex(u8 *in_ie, uint in_len, u8 eid, u8 *oui, u8 oui_len, u8 *ie, u cnt = 0; - while (cnt < in_len) { + while (cnt + 2 <= in_len) { + u8 ie_len = in_ie[cnt + 1]; + + if (cnt + 2 + ie_len > in_len) + break; + if (eid == in_ie[cnt] - && (!oui || !memcmp(&in_ie[cnt+2], oui, oui_len))) { + && (!oui || (ie_len >= oui_len && !memcmp(&in_ie[cnt + 2], oui, oui_len)))) { target_ie = &in_ie[cnt]; if (ie) - memcpy(ie, &in_ie[cnt], in_ie[cnt+1]+2); + memcpy(ie, &in_ie[cnt], ie_len + 2); if (ielen) - *ielen = in_ie[cnt+1]+2; + *ielen = ie_len + 2; break; } - cnt += in_ie[cnt+1]+2; /* goto next */ + cnt += ie_len + 2; /* goto next */ } return target_ie;
diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c index 7df6517..1ef48bf6 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c
@@ -1988,7 +1988,10 @@ int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_ while (i < in_len) { ielength = initial_out_len; - if (in_ie[i] == 0xDD && in_ie[i + 2] == 0x00 && in_ie[i + 3] == 0x50 && in_ie[i + 4] == 0xF2 && in_ie[i + 5] == 0x02 && i + 5 < in_len) { /* WMM element ID and OUI */ + if (i + 5 < in_len && + in_ie[i] == 0xDD && in_ie[i + 2] == 0x00 && + in_ie[i + 3] == 0x50 && in_ie[i + 4] == 0xF2 && + in_ie[i + 5] == 0x02) { for (j = i; j < i + 9; j++) { out_ie[ielength] = in_ie[j]; ielength++;
diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c index dec1f6b..62f6e0c 100644 --- a/drivers/staging/sm750fb/sm750.c +++ b/drivers/staging/sm750fb/sm750.c
@@ -1123,6 +1123,7 @@ static void lynxfb_pci_remove(struct pci_dev *pdev) iounmap(sm750_dev->pvReg); iounmap(sm750_dev->pvMem); + pci_release_region(pdev, 1); kfree(g_settings); }
diff --git a/drivers/staging/sm750fb/sm750_hw.c b/drivers/staging/sm750fb/sm750_hw.c index a29faee..f60b152 100644 --- a/drivers/staging/sm750fb/sm750_hw.c +++ b/drivers/staging/sm750fb/sm750_hw.c
@@ -36,16 +36,11 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) pr_info("mmio phyAddr = %lx\n", sm750_dev->vidreg_start); - /* - * reserve the vidreg space of smi adaptor - * if you do this, you need to add release region code - * in lynxfb_remove, or memory will not be mapped again - * successfully - */ + /* reserve the vidreg space of smi adaptor */ ret = pci_request_region(pdev, 1, "sm750fb"); if (ret) { pr_err("Can not request PCI regions.\n"); - goto exit; + return ret; } /* now map mmio and vidmem */ @@ -54,7 +49,7 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) if (!sm750_dev->pvReg) { pr_err("mmio failed\n"); ret = -EFAULT; - goto exit; + goto err_release_region; } pr_info("mmio virtual addr = %p\n", sm750_dev->pvReg); @@ -79,13 +74,18 @@ int hw_sm750_map(struct sm750_dev *sm750_dev, struct pci_dev *pdev) sm750_dev->pvMem = ioremap_wc(sm750_dev->vidmem_start, sm750_dev->vidmem_size); if (!sm750_dev->pvMem) { - iounmap(sm750_dev->pvReg); pr_err("Map video memory failed\n"); ret = -EFAULT; - goto exit; + goto err_unmap_reg; } pr_info("video memory vaddr = %p\n", sm750_dev->pvMem); -exit: + + return 0; + +err_unmap_reg: + iounmap(sm750_dev->pvReg); +err_release_region: + pci_release_region(pdev, 1); return ret; }
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index d668bd1..528883d 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c
@@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/configfs.h> +#include <linux/blk-mq.h> #include <scsi/scsi.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_host.h> @@ -269,15 +270,27 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } +static bool tcm_loop_flush_work_iter(struct request *rq, void *data) +{ + struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq); + struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc); + struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd; + + flush_work(&se_cmd->work); + return true; +} + static int tcm_loop_target_reset(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; struct tcm_loop_tpg *tl_tpg; + struct Scsi_Host *sh = sc->device->host; + int ret; /* * Locate the tcm_loop_hba_t pointer */ - tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + tl_hba = *(struct tcm_loop_hba **)shost_priv(sh); if (!tl_hba) { pr_err("Unable to perform device reset without active I_T Nexus\n"); return FAILED; @@ -286,11 +299,38 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc) * Locate the tl_tpg pointer from TargetID in sc->device->id */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - if (tl_tpg) { - tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; - return SUCCESS; - } - return FAILED; + if (!tl_tpg) + return FAILED; + + /* + * Issue a LUN_RESET to drain all commands that the target core + * knows about. This handles commands not yet marked CMD_T_COMPLETE. + */ + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); + if (ret != TMR_FUNCTION_COMPLETE) + return FAILED; + + /* + * Flush any deferred target core completion work that may still be + * queued. Commands that already had CMD_T_COMPLETE set before the TMR + * are skipped by the TMR drain, but their async completion work + * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done) + * may still be pending in target_completion_wq. + * + * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery + * commands (e.g. TUR) immediately after this handler returns SUCCESS — + * if deferred work is still pending, the memset in queuecommand would + * zero the se_cmd while the work accesses it, leaking the LUN + * percpu_ref and hanging configfs unlink forever. + * + * Use blk_mq_tagset_busy_iter() to find all started requests and + * flush_work() on each — the same pattern used by mpi3mr, scsi_debug, + * and other SCSI drivers to drain outstanding commands during reset. + */ + blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL); + + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return SUCCESS; } static const struct scsi_host_template tcm_loop_driver_template = {
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 17608ea..a1c91d4 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c
@@ -108,8 +108,8 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item, const char *page, size_t count) { ssize_t read_bytes; - struct file *fp; ssize_t r = -EINVAL; + struct path path = {}; mutex_lock(&target_devices_lock); if (target_devices) { @@ -131,17 +131,14 @@ static ssize_t target_core_item_dbroot_store(struct config_item *item, db_root_stage[read_bytes - 1] = '\0'; /* validate new db root before accepting it */ - fp = filp_open(db_root_stage, O_RDONLY, 0); - if (IS_ERR(fp)) { + r = kern_path(db_root_stage, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (r) { pr_err("db_root: cannot open: %s\n", db_root_stage); + if (r == -ENOTDIR) + pr_err("db_root: not a directory: %s\n", db_root_stage); goto unlock; } - if (!S_ISDIR(file_inode(fp)->i_mode)) { - filp_close(fp, NULL); - pr_err("db_root: not a directory: %s\n", db_root_stage); - goto unlock; - } - filp_close(fp, NULL); + path_put(&path); strscpy(db_root, db_root_stage); pr_debug("Target_Core_ConfigFS: db_root set to %s\n", db_root);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 3ae1f71..3d593af 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c
@@ -276,7 +276,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, ssize_t len = 0; int ret = 0, i; - aio_cmd = kmalloc_flex(*aio_cmd, bvecs, sgl_nents); + aio_cmd = kzalloc_flex(*aio_cmd, bvecs, sgl_nents); if (!aio_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 6c5b9e3..e9ea9f8 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c
@@ -23,29 +23,11 @@ struct tee_shm_dma_mem { struct page *page; }; -static void shm_put_kernel_pages(struct page **pages, size_t page_count) -{ - size_t n; - - for (n = 0; n < page_count; n++) - put_page(pages[n]); -} - -static void shm_get_kernel_pages(struct page **pages, size_t page_count) -{ - size_t n; - - for (n = 0; n < page_count; n++) - get_page(pages[n]); -} - static void release_registered_pages(struct tee_shm *shm) { if (shm->pages) { if (shm->flags & TEE_SHM_USER_MAPPED) unpin_user_pages(shm->pages, shm->num_pages); - else - shm_put_kernel_pages(shm->pages, shm->num_pages); kfree(shm->pages); } @@ -477,13 +459,6 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, goto err_put_shm_pages; } - /* - * iov_iter_extract_kvec_pages does not get reference on the pages, - * get a reference on them. - */ - if (iov_iter_is_kvec(iter)) - shm_get_kernel_pages(shm->pages, num_pages); - shm->offset = off; shm->size = len; shm->num_pages = num_pages; @@ -499,8 +474,6 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, err_put_shm_pages: if (!iov_iter_is_kvec(iter)) unpin_user_pages(shm->pages, shm->num_pages); - else - shm_put_kernel_pages(shm->pages, shm->num_pages); err_free_shm_pages: kfree(shm->pages); err_free_shm:
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c index 49ff3ba..91f2916 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c
@@ -176,15 +176,21 @@ static inline void write_soc_slider(struct proc_thermal_device *proc_priv, u64 v static void set_soc_power_profile(struct proc_thermal_device *proc_priv, int slider) { + u8 offset; u64 val; val = read_soc_slider(proc_priv); val &= ~SLIDER_MASK; val |= FIELD_PREP(SLIDER_MASK, slider) | BIT(SLIDER_ENABLE_BIT); + if (slider == SOC_SLIDER_VALUE_MINIMUM || slider == SOC_SLIDER_VALUE_MAXIMUM) + offset = 0; + else + offset = slider_offset; + /* Set the slider offset from module params */ val &= ~SLIDER_OFFSET_MASK; - val |= FIELD_PREP(SLIDER_OFFSET_MASK, slider_offset); + val |= FIELD_PREP(SLIDER_OFFSET_MASK, offset); write_soc_slider(proc_priv, val); }
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index b7d706e..d1beee9 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c
@@ -41,6 +41,8 @@ static struct thermal_governor *def_governor; static bool thermal_pm_suspended; +static struct workqueue_struct *thermal_wq __ro_after_init; + /* * Governor section: set of functions to handle thermal governors * @@ -313,7 +315,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, if (delay > HZ) delay = round_jiffies_relative(delay); - mod_delayed_work(system_freezable_power_efficient_wq, &tz->poll_queue, delay); + mod_delayed_work(thermal_wq, &tz->poll_queue, delay); } static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) @@ -1640,6 +1642,7 @@ thermal_zone_device_register_with_trips(const char *type, device_del(&tz->device); release_device: put_device(&tz->device); + wait_for_completion(&tz->removal); remove_id: ida_free(&thermal_tz_ida, id); free_tzp: @@ -1785,6 +1788,10 @@ static void thermal_zone_device_resume(struct work_struct *work) guard(thermal_zone)(tz); + /* If the thermal zone is going away, there's nothing to do. */ + if (tz->state & TZ_STATE_FLAG_EXIT) + return; + tz->state &= ~(TZ_STATE_FLAG_SUSPENDED | TZ_STATE_FLAG_RESUMING); thermal_debug_tz_resume(tz); @@ -1811,6 +1818,9 @@ static void thermal_zone_pm_prepare(struct thermal_zone_device *tz) } tz->state |= TZ_STATE_FLAG_SUSPENDED; + + /* Prevent new work from getting to the workqueue subsequently. */ + cancel_delayed_work(&tz->poll_queue); } static void thermal_pm_notify_prepare(void) @@ -1829,8 +1839,6 @@ static void thermal_zone_pm_complete(struct thermal_zone_device *tz) { guard(thermal_zone)(tz); - cancel_delayed_work(&tz->poll_queue); - reinit_completion(&tz->resume); tz->state |= TZ_STATE_FLAG_RESUMING; @@ -1840,7 +1848,7 @@ static void thermal_zone_pm_complete(struct thermal_zone_device *tz) */ INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_resume); /* Queue up the work without a delay. */ - mod_delayed_work(system_freezable_power_efficient_wq, &tz->poll_queue, 0); + mod_delayed_work(thermal_wq, &tz->poll_queue, 0); } static void thermal_pm_notify_complete(void) @@ -1863,6 +1871,11 @@ static int thermal_pm_notify(struct notifier_block *nb, case PM_RESTORE_PREPARE: case PM_SUSPEND_PREPARE: thermal_pm_notify_prepare(); + /* + * Allow any leftover thermal work items already on the + * worqueue to complete so they don't get in the way later. + */ + flush_workqueue(thermal_wq); break; case PM_POST_HIBERNATION: case PM_POST_RESTORE: @@ -1895,9 +1908,16 @@ static int __init thermal_init(void) if (result) goto error; + thermal_wq = alloc_workqueue("thermal_events", + WQ_FREEZABLE | WQ_POWER_EFFICIENT | WQ_PERCPU, 0); + if (!thermal_wq) { + result = -ENOMEM; + goto unregister_netlink; + } + result = thermal_register_governors(); if (result) - goto unregister_netlink; + goto destroy_workqueue; thermal_class = kzalloc_obj(*thermal_class); if (!thermal_class) { @@ -1924,6 +1944,8 @@ static int __init thermal_init(void) unregister_governors: thermal_unregister_governors(); +destroy_workqueue: + destroy_workqueue(thermal_wq); unregister_netlink: thermal_netlink_exit(); error:
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index ccce020..2bb2e79 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c
@@ -1020,7 +1020,7 @@ static bool nhi_wake_supported(struct pci_dev *pdev) * If power rails are sustainable for wakeup from S4 this * property is set by the BIOS. */ - if (device_property_read_u8(&pdev->dev, "WAKE_SUPPORTED", &val)) + if (!device_property_read_u8(&pdev->dev, "WAKE_SUPPORTED", &val)) return !!val; return true;
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 8caecfc..77fe058 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h
@@ -175,7 +175,9 @@ static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, return value; } +void serial8250_clear_fifos(struct uart_8250_port *p); void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); +void serial8250_fifo_wait_for_lsr_thre(struct uart_8250_port *up, unsigned int count); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); @@ -400,6 +402,26 @@ static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) return dma && dma->tx_running; } + +static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + dmaengine_pause(dma->txchan); +} + +static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + dmaengine_resume(dma->txchan); +} #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { @@ -421,6 +443,9 @@ static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } + +static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) { } +static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) { } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up)
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index bdd26c9..3b6452e 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c
@@ -162,7 +162,22 @@ void serial8250_tx_dma_flush(struct uart_8250_port *p) */ dma->tx_size = 0; + /* + * We can't use `dmaengine_terminate_sync` because `uart_flush_buffer` is + * holding the uart port spinlock. + */ dmaengine_terminate_async(dma->txchan); + + /* + * The callback might or might not run. If it doesn't run, we need to ensure + * that `tx_running` is cleared so that we can schedule new transactions. + * If it does run, then the zombie callback will clear `tx_running` again + * and perform a no-op since `tx_size` was cleared above. + * + * In either case, we ASSUME the DMA transaction will terminate before we + * issue a new `serial8250_tx_dma`. + */ + dma->tx_running = 0; } int serial8250_rx_dma(struct uart_8250_port *p)
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index db73b2a..94beadb 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c
@@ -9,10 +9,14 @@ * LCR is written whilst busy. If it is, then a busy detect interrupt is * raised, the LCR needs to be rewritten and the uart status register read. */ +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/cleanup.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/lockdep.h> #include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/notifier.h> @@ -40,8 +44,12 @@ #define RZN1_UART_RDMACR 0x110 /* DMA Control Register Receive Mode */ /* DesignWare specific register fields */ +#define DW_UART_IIR_IID GENMASK(3, 0) + #define DW_UART_MCR_SIRE BIT(6) +#define DW_UART_USR_BUSY BIT(0) + /* Renesas specific register fields */ #define RZN1_UART_xDMACR_DMA_EN BIT(0) #define RZN1_UART_xDMACR_1_WORD_BURST (0 << 1) @@ -56,6 +64,13 @@ #define DW_UART_QUIRK_IS_DMA_FC BIT(3) #define DW_UART_QUIRK_APMC0D08 BIT(4) #define DW_UART_QUIRK_CPR_VALUE BIT(5) +#define DW_UART_QUIRK_IER_KICK BIT(6) + +/* + * Number of consecutive IIR_NO_INT interrupts required to trigger interrupt + * storm prevention code. + */ +#define DW_UART_QUIRK_IER_KICK_THRES 4 struct dw8250_platform_data { u8 usr_reg; @@ -77,6 +92,9 @@ struct dw8250_data { unsigned int skip_autocfg:1; unsigned int uart_16550_compatible:1; + unsigned int in_idle:1; + + u8 no_int_count; }; static inline struct dw8250_data *to_dw8250_data(struct dw8250_port_data *data) @@ -107,78 +125,167 @@ static inline u32 dw8250_modify_msr(struct uart_port *p, unsigned int offset, u3 return value; } -/* - * This function is being called as part of the uart_port::serial_out() - * routine. Hence, it must not call serial_port_out() or serial_out() - * against the modified registers here, i.e. LCR. - */ -static void dw8250_force_idle(struct uart_port *p) +static void dw8250_idle_exit(struct uart_port *p) { + struct dw8250_data *d = to_dw8250_data(p->private_data); struct uart_8250_port *up = up_to_u8250p(p); - unsigned int lsr; - /* - * The following call currently performs serial_out() - * against the FCR register. Because it differs to LCR - * there will be no infinite loop, but if it ever gets - * modified, we might need a new custom version of it - * that avoids infinite recursion. - */ - serial8250_clear_and_reinit_fifos(up); + if (d->uart_16550_compatible) + return; - /* - * With PSLVERR_RESP_EN parameter set to 1, the device generates an - * error response when an attempt to read an empty RBR with FIFO - * enabled. - */ - if (up->fcr & UART_FCR_ENABLE_FIFO) { - lsr = serial_port_in(p, UART_LSR); - if (!(lsr & UART_LSR_DR)) - return; + if (up->capabilities & UART_CAP_FIFO) + serial_port_out(p, UART_FCR, up->fcr); + serial_port_out(p, UART_MCR, up->mcr); + serial_port_out(p, UART_IER, up->ier); + + /* DMA Rx is restarted by IRQ handler as needed. */ + if (up->dma) + serial8250_tx_dma_resume(up); + + d->in_idle = 0; +} + +/* + * Ensure BUSY is not asserted. If DW UART is configured with + * !uart_16550_compatible, the writes to LCR, DLL, and DLH fail while + * BUSY is asserted. + * + * Context: port's lock must be held + */ +static int dw8250_idle_enter(struct uart_port *p) +{ + struct dw8250_data *d = to_dw8250_data(p->private_data); + unsigned int usr_reg = d->pdata ? d->pdata->usr_reg : DW_UART_USR; + struct uart_8250_port *up = up_to_u8250p(p); + int retries; + u32 lsr; + + lockdep_assert_held_once(&p->lock); + + if (d->uart_16550_compatible) + return 0; + + d->in_idle = 1; + + /* Prevent triggering interrupt from RBR filling */ + serial_port_out(p, UART_IER, 0); + + if (up->dma) { + serial8250_rx_dma_flush(up); + if (serial8250_tx_dma_running(up)) + serial8250_tx_dma_pause(up); } - serial_port_in(p, UART_RX); + /* + * Wait until Tx becomes empty + one extra frame time to ensure all bits + * have been sent on the wire. + * + * FIXME: frame_time delay is too long with very low baudrates. + */ + serial8250_fifo_wait_for_lsr_thre(up, p->fifosize); + ndelay(p->frame_time); + + serial_port_out(p, UART_MCR, up->mcr | UART_MCR_LOOP); + + retries = 4; /* Arbitrary limit, 2 was always enough in tests */ + do { + serial8250_clear_fifos(up); + if (!(serial_port_in(p, usr_reg) & DW_UART_USR_BUSY)) + break; + /* FIXME: frame_time delay is too long with very low baudrates. */ + ndelay(p->frame_time); + } while (--retries); + + lsr = serial_lsr_in(up); + if (lsr & UART_LSR_DR) { + serial_port_in(p, UART_RX); + up->lsr_saved_flags = 0; + } + + /* Now guaranteed to have BUSY deasserted? Just sanity check */ + if (serial_port_in(p, usr_reg) & DW_UART_USR_BUSY) { + dw8250_idle_exit(p); + return -EBUSY; + } + + return 0; +} + +static void dw8250_set_divisor(struct uart_port *p, unsigned int baud, + unsigned int quot, unsigned int quot_frac) +{ + struct uart_8250_port *up = up_to_u8250p(p); + int ret; + + ret = dw8250_idle_enter(p); + if (ret < 0) + return; + + serial_port_out(p, UART_LCR, up->lcr | UART_LCR_DLAB); + if (!(serial_port_in(p, UART_LCR) & UART_LCR_DLAB)) + goto idle_failed; + + serial_dl_write(up, quot); + serial_port_out(p, UART_LCR, up->lcr); + +idle_failed: + dw8250_idle_exit(p); } /* * This function is being called as part of the uart_port::serial_out() - * routine. Hence, it must not call serial_port_out() or serial_out() - * against the modified registers here, i.e. LCR. + * routine. Hence, special care must be taken when serial_port_out() or + * serial_out() against the modified registers here, i.e. LCR (d->in_idle is + * used to break recursion loop). */ static void dw8250_check_lcr(struct uart_port *p, unsigned int offset, u32 value) { struct dw8250_data *d = to_dw8250_data(p->private_data); - void __iomem *addr = p->membase + (offset << p->regshift); - int tries = 1000; + u32 lcr; + int ret; if (offset != UART_LCR || d->uart_16550_compatible) return; + lcr = serial_port_in(p, UART_LCR); + /* Make sure LCR write wasn't ignored */ - while (tries--) { - u32 lcr = serial_port_in(p, offset); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) + return; - if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) - return; + if (d->in_idle) + goto write_err; - dw8250_force_idle(p); + ret = dw8250_idle_enter(p); + if (ret < 0) + goto write_err; -#ifdef CONFIG_64BIT - if (p->type == PORT_OCTEON) - __raw_writeq(value & 0xff, addr); - else -#endif - if (p->iotype == UPIO_MEM32) - writel(value, addr); - else if (p->iotype == UPIO_MEM32BE) - iowrite32be(value, addr); - else - writeb(value, addr); - } + serial_port_out(p, UART_LCR, value); + dw8250_idle_exit(p); + return; + +write_err: /* * FIXME: this deadlocks if port->lock is already held * dev_err(p->dev, "Couldn't set LCR to %d\n", value); */ + return; /* Silences "label at the end of compound statement" */ +} + +/* + * With BUSY, LCR writes can be very expensive (IRQ + complex retry logic). + * If the write does not change the value of the LCR register, skip it entirely. + */ +static bool dw8250_can_skip_reg_write(struct uart_port *p, unsigned int offset, u32 value) +{ + struct dw8250_data *d = to_dw8250_data(p->private_data); + u32 lcr; + + if (offset != UART_LCR || d->uart_16550_compatible) + return false; + + lcr = serial_port_in(p, offset); + return lcr == value; } /* Returns once the transmitter is empty or we run out of retries */ @@ -207,12 +314,18 @@ static void dw8250_tx_wait_empty(struct uart_port *p) static void dw8250_serial_out(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + writeb(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } static void dw8250_serial_out38x(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + /* Allow the TX to drain before we reconfigure */ if (offset == UART_LCR) dw8250_tx_wait_empty(p); @@ -237,6 +350,9 @@ static u32 dw8250_serial_inq(struct uart_port *p, unsigned int offset) static void dw8250_serial_outq(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + value &= 0xff; __raw_writeq(value, p->membase + (offset << p->regshift)); /* Read back to ensure register write ordering. */ @@ -248,6 +364,9 @@ static void dw8250_serial_outq(struct uart_port *p, unsigned int offset, u32 val static void dw8250_serial_out32(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + writel(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } @@ -261,6 +380,9 @@ static u32 dw8250_serial_in32(struct uart_port *p, unsigned int offset) static void dw8250_serial_out32be(struct uart_port *p, unsigned int offset, u32 value) { + if (dw8250_can_skip_reg_write(p, offset, value)) + return; + iowrite32be(value, p->membase + (offset << p->regshift)); dw8250_check_lcr(p, offset, value); } @@ -272,6 +394,29 @@ static u32 dw8250_serial_in32be(struct uart_port *p, unsigned int offset) return dw8250_modify_msr(p, offset, value); } +/* + * INTC10EE UART can IRQ storm while reporting IIR_NO_INT. Inducing IIR value + * change has been observed to break the storm. + * + * If Tx is empty (THRE asserted), we use here IER_THRI to cause IIR_NO_INT -> + * IIR_THRI transition. + */ +static void dw8250_quirk_ier_kick(struct uart_port *p) +{ + struct uart_8250_port *up = up_to_u8250p(p); + u32 lsr; + + if (up->ier & UART_IER_THRI) + return; + + lsr = serial_lsr_in(up); + if (!(lsr & UART_LSR_THRE)) + return; + + serial_port_out(p, UART_IER, up->ier | UART_IER_THRI); + serial_port_in(p, UART_LCR); /* safe, no side-effects */ + serial_port_out(p, UART_IER, up->ier); +} static int dw8250_handle_irq(struct uart_port *p) { @@ -281,7 +426,31 @@ static int dw8250_handle_irq(struct uart_port *p) bool rx_timeout = (iir & 0x3f) == UART_IIR_RX_TIMEOUT; unsigned int quirks = d->pdata->quirks; unsigned int status; - unsigned long flags; + + guard(uart_port_lock_irqsave)(p); + + switch (FIELD_GET(DW_UART_IIR_IID, iir)) { + case UART_IIR_NO_INT: + if (d->uart_16550_compatible || up->dma) + return 0; + + if (quirks & DW_UART_QUIRK_IER_KICK && + d->no_int_count == (DW_UART_QUIRK_IER_KICK_THRES - 1)) + dw8250_quirk_ier_kick(p); + d->no_int_count = (d->no_int_count + 1) % DW_UART_QUIRK_IER_KICK_THRES; + + return 0; + + case UART_IIR_BUSY: + /* Clear the USR */ + serial_port_in(p, d->pdata->usr_reg); + + d->no_int_count = 0; + + return 1; + } + + d->no_int_count = 0; /* * There are ways to get Designware-based UARTs into a state where @@ -294,20 +463,15 @@ static int dw8250_handle_irq(struct uart_port *p) * so we limit the workaround only to non-DMA mode. */ if (!up->dma && rx_timeout) { - uart_port_lock_irqsave(p, &flags); status = serial_lsr_in(up); if (!(status & (UART_LSR_DR | UART_LSR_BI))) serial_port_in(p, UART_RX); - - uart_port_unlock_irqrestore(p, flags); } /* Manually stop the Rx DMA transfer when acting as flow controller */ if (quirks & DW_UART_QUIRK_IS_DMA_FC && up->dma && up->dma->rx_running && rx_timeout) { - uart_port_lock_irqsave(p, &flags); status = serial_lsr_in(up); - uart_port_unlock_irqrestore(p, flags); if (status & (UART_LSR_DR | UART_LSR_BI)) { dw8250_writel_ext(p, RZN1_UART_RDMACR, 0); @@ -315,17 +479,9 @@ static int dw8250_handle_irq(struct uart_port *p) } } - if (serial8250_handle_irq(p, iir)) - return 1; + serial8250_handle_irq_locked(p, iir); - if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { - /* Clear the USR */ - serial_port_in(p, d->pdata->usr_reg); - - return 1; - } - - return 0; + return 1; } static void dw8250_clk_work_cb(struct work_struct *work) @@ -527,6 +683,14 @@ static void dw8250_reset_control_assert(void *data) reset_control_assert(data); } +static void dw8250_shutdown(struct uart_port *port) +{ + struct dw8250_data *d = to_dw8250_data(port->private_data); + + serial8250_do_shutdown(port); + d->no_int_count = 0; +} + static int dw8250_probe(struct platform_device *pdev) { struct uart_8250_port uart = {}, *up = &uart; @@ -545,8 +709,10 @@ static int dw8250_probe(struct platform_device *pdev) p->type = PORT_8250; p->flags = UPF_FIXED_PORT; p->dev = dev; + p->set_ldisc = dw8250_set_ldisc; p->set_termios = dw8250_set_termios; + p->set_divisor = dw8250_set_divisor; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -654,10 +820,12 @@ static int dw8250_probe(struct platform_device *pdev) dw8250_quirks(p, data); /* If the Busy Functionality is not implemented, don't handle it */ - if (data->uart_16550_compatible) + if (data->uart_16550_compatible) { p->handle_irq = NULL; - else if (data->pdata) + } else if (data->pdata) { p->handle_irq = dw8250_handle_irq; + p->shutdown = dw8250_shutdown; + } dw8250_setup_dma_filter(p, data); @@ -789,6 +957,11 @@ static const struct dw8250_platform_data dw8250_skip_set_rate_data = { .quirks = DW_UART_QUIRK_SKIP_SET_RATE, }; +static const struct dw8250_platform_data dw8250_intc10ee = { + .usr_reg = DW_UART_USR, + .quirks = DW_UART_QUIRK_IER_KICK, +}; + static const struct of_device_id dw8250_of_match[] = { { .compatible = "snps,dw-apb-uart", .data = &dw8250_dw_apb }, { .compatible = "cavium,octeon-3860-uart", .data = &dw8250_octeon_3860_data }, @@ -818,7 +991,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3434", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3435", (kernel_ulong_t)&dw8250_dw_apb }, - { "INTC10EE", (kernel_ulong_t)&dw8250_dw_apb }, + { "INTC10EE", (kernel_ulong_t)&dw8250_intc10ee }, { }, }; MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match); @@ -836,6 +1009,7 @@ static struct platform_driver dw8250_platform_driver = { module_platform_driver(dw8250_platform_driver); +MODULE_IMPORT_NS("SERIAL_8250"); MODULE_AUTHOR("Jamie Iles"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Synopsys DesignWare 8250 serial port driver");
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index aa1ab4d..6cfd1b2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c
@@ -137,6 +137,8 @@ struct serial_private { }; #define PCI_DEVICE_ID_HPE_PCI_SERIAL 0x37e +#define PCIE_VENDOR_ID_ASIX 0x125B +#define PCIE_DEVICE_ID_AX99100 0x9100 static const struct pci_device_id pci_use_msi[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9900, @@ -149,6 +151,8 @@ static const struct pci_device_id pci_use_msi[] = { 0xA000, 0x1000) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL, PCI_ANY_ID, PCI_ANY_ID) }, + { PCI_DEVICE_SUB(PCIE_VENDOR_ID_ASIX, PCIE_DEVICE_ID_AX99100, + 0xA000, 0x1000) }, { } }; @@ -920,6 +924,7 @@ static int pci_netmos_init(struct pci_dev *dev) case PCI_DEVICE_ID_NETMOS_9912: case PCI_DEVICE_ID_NETMOS_9922: case PCI_DEVICE_ID_NETMOS_9900: + case PCIE_DEVICE_ID_AX99100: num_serial = pci_netmos_9900_numports(dev); break; @@ -2544,6 +2549,14 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .init = pci_netmos_init, .setup = pci_netmos_9900_setup, }, + { + .vendor = PCIE_VENDOR_ID_ASIX, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_netmos_init, + .setup = pci_netmos_9900_setup, + }, /* * EndRun Technologies */ @@ -6065,6 +6078,10 @@ static const struct pci_device_id serial_pci_tbl[] = { 0xA000, 0x3002, 0, 0, pbn_NETMOS9900_2s_115200 }, + { PCIE_VENDOR_ID_ASIX, PCIE_DEVICE_ID_AX99100, + 0xA000, 0x1000, + 0, 0, pbn_b0_1_115200 }, + /* * Best Connectivity and Rosewill PCI Multi I/O cards */
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index cc94af2..328711b 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c
@@ -18,6 +18,7 @@ #include <linux/irq.h> #include <linux/console.h> #include <linux/gpio/consumer.h> +#include <linux/lockdep.h> #include <linux/sysrq.h> #include <linux/delay.h> #include <linux/platform_device.h> @@ -488,7 +489,7 @@ serial_port_out_sync(struct uart_port *p, int offset, int value) /* * FIFO support. */ -static void serial8250_clear_fifos(struct uart_8250_port *p) +void serial8250_clear_fifos(struct uart_8250_port *p) { if (p->capabilities & UART_CAP_FIFO) { serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO); @@ -497,6 +498,7 @@ static void serial8250_clear_fifos(struct uart_8250_port *p) serial_out(p, UART_FCR, 0); } } +EXPORT_SYMBOL_NS_GPL(serial8250_clear_fifos, "SERIAL_8250"); static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t); static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t); @@ -1782,20 +1784,16 @@ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) } /* - * This handles the interrupt from one port. + * Context: port's lock must be held by the caller. */ -int serial8250_handle_irq(struct uart_port *port, unsigned int iir) +void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir) { struct uart_8250_port *up = up_to_u8250p(port); struct tty_port *tport = &port->state->port; bool skip_rx = false; - unsigned long flags; u16 status; - if (iir & UART_IIR_NO_INT) - return 0; - - uart_port_lock_irqsave(port, &flags); + lockdep_assert_held_once(&port->lock); status = serial_lsr_in(up); @@ -1828,8 +1826,19 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) else if (!up->dma->tx_running) __stop_tx(up); } +} +EXPORT_SYMBOL_NS_GPL(serial8250_handle_irq_locked, "SERIAL_8250"); - uart_unlock_and_check_sysrq_irqrestore(port, flags); +/* + * This handles the interrupt from one port. + */ +int serial8250_handle_irq(struct uart_port *port, unsigned int iir) +{ + if (iir & UART_IIR_NO_INT) + return 0; + + guard(uart_port_lock_irqsave)(port); + serial8250_handle_irq_locked(port, iir); return 1; } @@ -2147,8 +2156,7 @@ static void serial8250_THRE_test(struct uart_port *port) if (up->port.flags & UPF_NO_THRE_TEST) return; - if (port->irqflags & IRQF_SHARED) - disable_irq_nosync(port->irq); + disable_irq(port->irq); /* * Test for UARTs that do not reassert THRE when the transmitter is idle and the interrupt @@ -2170,8 +2178,7 @@ static void serial8250_THRE_test(struct uart_port *port) serial_port_out(port, UART_IER, 0); } - if (port->irqflags & IRQF_SHARED) - enable_irq(port->irq); + enable_irq(port->irq); /* * If the interrupt is not reasserted, or we otherwise don't trust the iir, setup a timer to @@ -2350,6 +2357,7 @@ static int serial8250_startup(struct uart_port *port) void serial8250_do_shutdown(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); + u32 lcr; serial8250_rpm_get(up); /* @@ -2376,13 +2384,13 @@ void serial8250_do_shutdown(struct uart_port *port) port->mctrl &= ~TIOCM_OUT2; serial8250_set_mctrl(port, port->mctrl); + + /* Disable break condition */ + lcr = serial_port_in(port, UART_LCR); + lcr &= ~UART_LCR_SBC; + serial_port_out(port, UART_LCR, lcr); } - /* - * Disable break condition and FIFOs - */ - serial_port_out(port, UART_LCR, - serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); serial8250_clear_fifos(up); rsa_disable(up); @@ -2392,6 +2400,12 @@ void serial8250_do_shutdown(struct uart_port *port) * the IRQ chain. */ serial_port_in(port, UART_RX); + /* + * LCR writes on DW UART can trigger late (unmaskable) IRQs. + * Handle them before releasing the handler. + */ + synchronize_irq(port->irq); + serial8250_rpm_put(up); up->ops->release_irq(up); @@ -3185,6 +3199,17 @@ void serial8250_set_defaults(struct uart_8250_port *up) } EXPORT_SYMBOL_GPL(serial8250_set_defaults); +void serial8250_fifo_wait_for_lsr_thre(struct uart_8250_port *up, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (wait_for_lsr(up, UART_LSR_THRE)) + return; + } +} +EXPORT_SYMBOL_NS_GPL(serial8250_fifo_wait_for_lsr_thre, "SERIAL_8250"); + #ifdef CONFIG_SERIAL_8250_CONSOLE static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) @@ -3226,16 +3251,6 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } -static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) -{ - unsigned int i; - - for (i = 0; i < count; i++) { - if (wait_for_lsr(up, UART_LSR_THRE)) - return; - } -} - /* * Print a string to the serial port using the device FIFO * @@ -3254,7 +3269,7 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, while (s != end) { /* Allow timeout for each byte of a possibly full FIFO */ - fifo_wait_for_lsr(up, fifosize); + serial8250_fifo_wait_for_lsr_thre(up, fifosize); for (i = 0; i < fifosize && s != end; ++i) { if (*s == '\n' && !cr_sent) { @@ -3272,7 +3287,7 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, * Allow timeout for each byte written since the caller will only wait * for UART_LSR_BOTH_EMPTY using the timeout of a single character */ - fifo_wait_for_lsr(up, tx_count); + serial8250_fifo_wait_for_lsr_thre(up, tx_count); } /*
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 4877569..89cebdd 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c
@@ -643,7 +643,10 @@ static unsigned int uart_write_room(struct tty_struct *tty) unsigned int ret; port = uart_port_ref_lock(state, &flags); - ret = kfifo_avail(&state->port.xmit_fifo); + if (!state->port.xmit_buf) + ret = 0; + else + ret = kfifo_avail(&state->port.xmit_fifo); uart_port_unlock_deref(port, flags); return ret; }
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 39c1fd1..6240c3d 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c
@@ -878,6 +878,7 @@ static int ulite_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); pm_runtime_enable(&pdev->dev); ret = ulite_assign(&pdev->dev, id, res->start, irq, pdata);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index c1f152d..b4b1915 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c
@@ -1339,6 +1339,8 @@ struct vc_data *vc_deallocate(unsigned int currcons) kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; } + vc_uniscr_free(vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; } return vc; } @@ -1884,6 +1886,8 @@ static void enter_alt_screen(struct vc_data *vc) vc->vc_saved_screen = kmemdup((u16 *)vc->vc_origin, size, GFP_KERNEL); if (vc->vc_saved_screen == NULL) return; + vc->vc_saved_uni_lines = vc->vc_uni_lines; + vc->vc_uni_lines = NULL; vc->vc_saved_rows = vc->vc_rows; vc->vc_saved_cols = vc->vc_cols; save_cur(vc); @@ -1905,6 +1909,26 @@ static void leave_alt_screen(struct vc_data *vc) dest = ((u16 *)vc->vc_origin) + r * vc->vc_cols; memcpy(dest, src, 2 * cols); } + /* + * If the console was resized while in the alternate screen, + * resize the saved unicode buffer to the current dimensions. + * On allocation failure new_uniscr is NULL, causing the old + * buffer to be freed and vc_uni_lines to be lazily rebuilt + * via vc_uniscr_check() when next needed. + */ + if (vc->vc_saved_uni_lines && + (vc->vc_saved_rows != vc->vc_rows || + vc->vc_saved_cols != vc->vc_cols)) { + u32 **new_uniscr = vc_uniscr_alloc(vc->vc_cols, vc->vc_rows); + + if (new_uniscr) + vc_uniscr_copy_area(new_uniscr, vc->vc_cols, vc->vc_rows, + vc->vc_saved_uni_lines, cols, 0, rows); + vc_uniscr_free(vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = new_uniscr; + } + vc_uniscr_set(vc, vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; restore_cur(vc); /* Update the entire screen */ if (con_should_update(vc)) @@ -2227,6 +2251,8 @@ static void reset_terminal(struct vc_data *vc, int do_clear) if (vc->vc_saved_screen != NULL) { kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; + vc_uniscr_free(vc->vc_saved_uni_lines); + vc->vc_saved_uni_lines = NULL; vc->vc_saved_rows = 0; vc->vc_saved_cols = 0; }
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 847b557..9ceb6d6 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c
@@ -24,6 +24,7 @@ #include <linux/pm_opp.h> #include <linux/regulator/consumer.h> #include <linux/sched/clock.h> +#include <linux/sizes.h> #include <linux/iopoll.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> @@ -517,8 +518,8 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, struct scsi_cmnd *cmd, if (hba->mcq_enabled) { struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq); - - hwq_id = hwq->id; + if (hwq) + hwq_id = hwq->id; } else { doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); } @@ -4389,14 +4390,6 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) spin_unlock_irqrestore(hba->host->host_lock, flags); mutex_unlock(&hba->uic_cmd_mutex); - /* - * If the h8 exit fails during the runtime resume process, it becomes - * stuck and cannot be recovered through the error handler. To fix - * this, use link recovery instead of the error handler. - */ - if (ret && hba->pm_op_in_progress) - ret = ufshcd_link_recovery(hba); - return ret; } @@ -5249,6 +5242,25 @@ static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev) hba->dev_info.rpmb_region_size[1] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION1_SIZE]; hba->dev_info.rpmb_region_size[2] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION2_SIZE]; hba->dev_info.rpmb_region_size[3] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION3_SIZE]; + + if (hba->dev_info.wspecversion <= 0x0220) { + /* + * These older spec chips have only one RPMB region, + * sized between 128 kB minimum and 16 MB maximum. + * No per region size fields are provided (respective + * REGIONX_SIZE fields always contain zeros), so get + * it from the logical block count and size fields for + * compatibility + * + * (See JESD220C-2_2 Section 14.1.4.6 + * RPMB Unit Descriptor,* offset 13h, 4 bytes) + */ + hba->dev_info.rpmb_region_size[0] = + (get_unaligned_be64(desc_buf + + RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT) + << desc_buf[RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE]) + / SZ_128K; + } } @@ -5963,6 +5975,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(hba, "Disabled"); + hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT; hba->is_urgent_bkops_lvl_checked = false; out: return err; @@ -6066,7 +6079,7 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) * impacted or critical. Handle these device by determining their urgent * bkops status at runtime. */ - if (curr_status < BKOPS_STATUS_PERF_IMPACT) { + if ((curr_status > BKOPS_STATUS_NO_OP) && (curr_status < BKOPS_STATUS_PERF_IMPACT)) { dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n", __func__, curr_status); /* update the current status as the urgent bkops level */ @@ -7097,7 +7110,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba) ret = ufshcd_vops_get_outstanding_cqs(hba, &outstanding_cqs); if (ret) - outstanding_cqs = (1U << hba->nr_hw_queues) - 1; + outstanding_cqs = (1ULL << hba->nr_hw_queues) - 1; /* Exclude the poll queues */ nr_queues = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; @@ -10053,6 +10066,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) } flush_work(&hba->eeh_work); + cancel_delayed_work_sync(&hba->ufs_rtc_update_work); ret = ufshcd_vops_suspend(hba, pm_op, PRE_CHANGE); if (ret) @@ -10107,7 +10121,6 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (ret) goto set_link_active; - cancel_delayed_work_sync(&hba->ufs_rtc_update_work); goto out; set_link_active: @@ -10179,7 +10192,15 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) } else { dev_err(hba->dev, "%s: hibern8 exit failed %d\n", __func__, ret); - goto vendor_suspend; + /* + * If the h8 exit fails during the runtime resume + * process, it becomes stuck and cannot be recovered + * through the error handler. To fix this, use link + * recovery instead of the error handler. + */ + ret = ufshcd_link_recovery(hba); + if (ret) + goto vendor_suspend; } } else if (ufshcd_is_link_off(hba)) { /*
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index d59a60a..8382231 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -2589,6 +2589,9 @@ static int __cdns3_gadget_ep_queue(struct usb_ep *ep, struct cdns3_request *priv_req; int ret = 0; + if (!ep->desc) + return -ESHUTDOWN; + request->actual = 0; request->status = -EINPROGRESS; priv_req = to_cdns3_request(request); @@ -3428,6 +3431,7 @@ static int __cdns3_gadget_init(struct cdns *cdns) ret = cdns3_gadget_start(cdns); if (ret) { pm_runtime_put_sync(cdns->dev); + cdns_drd_gadget_off(cdns); return ret; }
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ad38c74..cf3c3ee 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c
@@ -1225,6 +1225,12 @@ static int acm_probe(struct usb_interface *intf, if (!data_interface || !control_interface) return -ENODEV; goto skip_normal_probe; + } else if (quirks == NO_UNION_12) { + data_interface = usb_ifnum_to_if(usb_dev, 2); + control_interface = usb_ifnum_to_if(usb_dev, 1); + if (!data_interface || !control_interface) + return -ENODEV; + goto skip_normal_probe; } /* normal probing*/ @@ -1379,6 +1385,8 @@ static int acm_probe(struct usb_interface *intf, acm->ctrl_caps = h.usb_cdc_acm_descriptor->bmCapabilities; if (quirks & NO_CAP_LINE) acm->ctrl_caps &= ~USB_CDC_CAP_LINE; + if (quirks & MISSING_CAP_BRK) + acm->ctrl_caps |= USB_CDC_CAP_BRK; acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; @@ -1746,6 +1754,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x045b, 0x024D), /* Renesas R-Car E3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, + { USB_DEVICE(0x04b8, 0x0d12), /* EPSON HMD Com&Sens */ + .driver_info = NO_UNION_12, /* union descriptor is garbage */ + }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, @@ -2002,6 +2013,9 @@ static const struct usb_device_id acm_ids[] = { .driver_info = IGNORE_DEVICE, }, + /* CH343 supports CAP_BRK, but doesn't advertise it */ + { USB_DEVICE(0x1a86, 0x55d3), .driver_info = MISSING_CAP_BRK, }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) },
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 759ac15..25fd532 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h
@@ -113,3 +113,5 @@ struct acm { #define CLEAR_HALT_CONDITIONS BIT(5) #define SEND_ZERO_PACKET BIT(6) #define DISABLE_ECHO BIT(7) +#define MISSING_CAP_BRK BIT(8) +#define NO_UNION_12 BIT(9)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index f2d94cf..7556c0da 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c
@@ -225,7 +225,8 @@ static void wdm_in_callback(struct urb *urb) /* we may already be in overflow */ if (!test_bit(WDM_OVERFLOW, &desc->flags)) { memmove(desc->ubuf + desc->length, desc->inbuf, length); - desc->length += length; + smp_wmb(); /* against wdm_read() */ + WRITE_ONCE(desc->length, desc->length + length); } } skip_error: @@ -533,6 +534,7 @@ static ssize_t wdm_read return -ERESTARTSYS; cntr = READ_ONCE(desc->length); + smp_rmb(); /* against wdm_in_callback() */ if (cntr == 0) { desc->read = 0; retry:
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 2526a0e..bd93478 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c
@@ -254,6 +254,9 @@ static int usbtmc_release(struct inode *inode, struct file *file) list_del(&file_data->file_elem); spin_unlock_irq(&file_data->data->dev_lock); + + /* flush anchored URBs */ + usbtmc_draw_down(file_data); mutex_unlock(&file_data->data->io_mutex); kref_put(&file_data->data->kref, usbtmc_delete); @@ -727,7 +730,7 @@ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data) buffer[1] = data->bTag; buffer[2] = ~data->bTag; - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, @@ -1347,7 +1350,7 @@ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data, buffer[11] = 0; /* Reserved */ /* Send bulk URB */ - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, @@ -1419,7 +1422,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, actual = 0; /* Send bulk URB */ - retval = usb_bulk_msg(data->usb_dev, + retval = usb_bulk_msg_killable(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, bufsize, &actual,
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 56c9bfa..b34fb65 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c
@@ -331,10 +331,9 @@ struct ulpi *ulpi_register_interface(struct device *dev, ulpi->ops = ops; ret = ulpi_register(dev, ulpi); - if (ret) { - kfree(ulpi); + if (ret) return ERR_PTR(ret); - } + return ulpi; }
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 1cd5fa6..6a1fd96 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c
@@ -927,7 +927,11 @@ int usb_get_configuration(struct usb_device *dev) dev->descriptor.bNumConfigurations = ncfg = USB_MAXCONFIG; } - if (ncfg < 1) { + if (ncfg < 1 && dev->quirks & USB_QUIRK_FORCE_ONE_CONFIG) { + dev_info(ddev, "Device claims zero configurations, forcing to 1\n"); + dev->descriptor.bNumConfigurations = 1; + ncfg = 1; + } else if (ncfg < 1) { dev_err(ddev, "no configurations\n"); return -EINVAL; }
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 2574e65..f630044 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c
@@ -1415,14 +1415,16 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) int status = 0; int i = 0, n = 0; struct usb_interface *intf; + bool offload_active = false; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) goto done; + usb_offload_set_pm_locked(udev, true); if (msg.event == PM_EVENT_SUSPEND && usb_offload_check(udev)) { dev_dbg(&udev->dev, "device offloaded, skip suspend.\n"); - udev->offload_at_suspend = 1; + offload_active = true; } /* Suspend all the interfaces and then udev itself */ @@ -1436,8 +1438,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) * interrupt urbs, allowing interrupt events to be * handled during system suspend. */ - if (udev->offload_at_suspend && - intf->needs_remote_wakeup) { + if (offload_active && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip suspend.\n"); continue; @@ -1452,7 +1453,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) } } if (status == 0) { - if (!udev->offload_at_suspend) + if (!offload_active) status = usb_suspend_device(udev, msg); /* @@ -1498,7 +1499,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) */ } else { udev->can_submit = 0; - if (!udev->offload_at_suspend) { + if (!offload_active) { for (i = 0; i < 16; ++i) { usb_hcd_flush_endpoint(udev, udev->ep_out[i]); usb_hcd_flush_endpoint(udev, udev->ep_in[i]); @@ -1507,6 +1508,8 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) } done: + if (status != 0) + usb_offload_set_pm_locked(udev, false); dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); return status; } @@ -1536,16 +1539,19 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) int status = 0; int i; struct usb_interface *intf; + bool offload_active = false; if (udev->state == USB_STATE_NOTATTACHED) { status = -ENODEV; goto done; } udev->can_submit = 1; + if (msg.event == PM_EVENT_RESUME) + offload_active = usb_offload_check(udev); /* Resume the device */ if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume) { - if (!udev->offload_at_suspend) + if (!offload_active) status = usb_resume_device(udev, msg); else dev_dbg(&udev->dev, @@ -1562,8 +1568,7 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) * pending interrupt urbs, allowing interrupt events * to be handled during system suspend. */ - if (udev->offload_at_suspend && - intf->needs_remote_wakeup) { + if (offload_active && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip resume.\n"); continue; @@ -1572,11 +1577,11 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) udev->reset_resume); } } - udev->offload_at_suspend = 0; usb_mark_last_busy(udev); done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); + usb_offload_set_pm_locked(udev, false); if (!status) udev->reset_resume = 0; return status;
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index dee842e..89221f1 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c
@@ -2403,7 +2403,7 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd) if (hcd->rh_registered) { pm_wakeup_event(&hcd->self.root_hub->dev, 0); set_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags); - queue_work(pm_wq, &hcd->wakeup_work); + queue_work(system_freezable_wq, &hcd->wakeup_work); } spin_unlock_irqrestore (&hcd_root_hub_lock, flags); }
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index ea970ddf..2ab120c 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c
@@ -42,16 +42,19 @@ static void usb_api_blocking_completion(struct urb *urb) /* - * Starts urb and waits for completion or timeout. Note that this call - * is NOT interruptible. Many device driver i/o requests should be - * interruptible and therefore these drivers should implement their - * own interruptible routines. + * Starts urb and waits for completion or timeout. + * Whether or not the wait is killable depends on the flag passed in. + * For example, compare usb_bulk_msg() and usb_bulk_msg_killable(). + * + * For non-killable waits, we enforce a maximum limit on the timeout value. */ -static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length) +static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length, + bool killable) { struct api_context ctx; unsigned long expire; int retval; + long rc; init_completion(&ctx.done); urb->context = &ctx; @@ -60,13 +63,24 @@ static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length) if (unlikely(retval)) goto out; - expire = timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT; - if (!wait_for_completion_timeout(&ctx.done, expire)) { + if (!killable && (timeout <= 0 || timeout > USB_MAX_SYNCHRONOUS_TIMEOUT)) + timeout = USB_MAX_SYNCHRONOUS_TIMEOUT; + expire = (timeout > 0) ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT; + if (killable) + rc = wait_for_completion_killable_timeout(&ctx.done, expire); + else + rc = wait_for_completion_timeout(&ctx.done, expire); + if (rc <= 0) { usb_kill_urb(urb); - retval = (ctx.status == -ENOENT ? -ETIMEDOUT : ctx.status); + if (ctx.status != -ENOENT) + retval = ctx.status; + else if (rc == 0) + retval = -ETIMEDOUT; + else + retval = rc; dev_dbg(&urb->dev->dev, - "%s timed out on ep%d%s len=%u/%u\n", + "%s timed out or killed on ep%d%s len=%u/%u\n", current->comm, usb_endpoint_num(&urb->ep->desc), usb_urb_dir_in(urb) ? "in" : "out", @@ -100,7 +114,7 @@ static int usb_internal_control_msg(struct usb_device *usb_dev, usb_fill_control_urb(urb, usb_dev, pipe, (unsigned char *)cmd, data, len, usb_api_blocking_completion, NULL); - retv = usb_start_wait_urb(urb, timeout, &length); + retv = usb_start_wait_urb(urb, timeout, &length, false); if (retv < 0) return retv; else @@ -117,8 +131,7 @@ static int usb_internal_control_msg(struct usb_device *usb_dev, * @index: USB message index value * @data: pointer to the data to send * @size: length in bytes of the data to send - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -173,8 +186,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg); * @index: USB message index value * @driver_data: pointer to the data to send * @size: length in bytes of the data to send - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * @memflags: the flags for memory allocation for buffers * * Context: !in_interrupt () @@ -232,8 +244,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg_send); * @index: USB message index value * @driver_data: pointer to the data to be filled in by the message * @size: length in bytes of the data to be received - * @timeout: time in msecs to wait for the message to complete before timing - * out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * @memflags: the flags for memory allocation for buffers * * Context: !in_interrupt () @@ -304,8 +315,7 @@ EXPORT_SYMBOL_GPL(usb_control_msg_recv); * @len: length in bytes of the data to send * @actual_length: pointer to a location to put the actual length transferred * in bytes - * @timeout: time in msecs to wait for the message to complete before - * timing out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -337,8 +347,7 @@ EXPORT_SYMBOL_GPL(usb_interrupt_msg); * @len: length in bytes of the data to send * @actual_length: pointer to a location to put the actual length transferred * in bytes - * @timeout: time in msecs to wait for the message to complete before - * timing out (if 0 the wait is forever) + * @timeout: time in msecs to wait for the message to complete before timing out * * Context: task context, might sleep. * @@ -385,10 +394,59 @@ int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, usb_fill_bulk_urb(urb, usb_dev, pipe, data, len, usb_api_blocking_completion, NULL); - return usb_start_wait_urb(urb, timeout, actual_length); + return usb_start_wait_urb(urb, timeout, actual_length, false); } EXPORT_SYMBOL_GPL(usb_bulk_msg); +/** + * usb_bulk_msg_killable - Builds a bulk urb, sends it off and waits for completion in a killable state + * @usb_dev: pointer to the usb device to send the message to + * @pipe: endpoint "pipe" to send the message to + * @data: pointer to the data to send + * @len: length in bytes of the data to send + * @actual_length: pointer to a location to put the actual length transferred + * in bytes + * @timeout: time in msecs to wait for the message to complete before + * timing out (if <= 0, the wait is as long as possible) + * + * Context: task context, might sleep. + * + * This function is just like usb_blk_msg(), except that it waits in a + * killable state and there is no limit on the timeout length. + * + * Return: + * If successful, 0. Otherwise a negative error number. The number of actual + * bytes transferred will be stored in the @actual_length parameter. + * + */ +int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe, + void *data, int len, int *actual_length, int timeout) +{ + struct urb *urb; + struct usb_host_endpoint *ep; + + ep = usb_pipe_endpoint(usb_dev, pipe); + if (!ep || len < 0) + return -EINVAL; + + urb = usb_alloc_urb(0, GFP_KERNEL); + if (!urb) + return -ENOMEM; + + if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_INT) { + pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30); + usb_fill_int_urb(urb, usb_dev, pipe, data, len, + usb_api_blocking_completion, NULL, + ep->desc.bInterval); + } else + usb_fill_bulk_urb(urb, usb_dev, pipe, data, len, + usb_api_blocking_completion, NULL); + + return usb_start_wait_urb(urb, timeout, actual_length, true); +} +EXPORT_SYMBOL_GPL(usb_bulk_msg_killable); + /*-------------------------------------------------------------------*/ static void sg_clean(struct usb_sg_request *io)
diff --git a/drivers/usb/core/offload.c b/drivers/usb/core/offload.c index 7c699f1..9db3cfe 100644 --- a/drivers/usb/core/offload.c +++ b/drivers/usb/core/offload.c
@@ -25,33 +25,30 @@ */ int usb_offload_get(struct usb_device *udev) { - int ret; + int ret = 0; - usb_lock_device(udev); - if (udev->state == USB_STATE_NOTATTACHED) { - usb_unlock_device(udev); + if (!usb_get_dev(udev)) return -ENODEV; + + if (pm_runtime_get_if_active(&udev->dev) != 1) { + ret = -EBUSY; + goto err_rpm; } - if (udev->state == USB_STATE_SUSPENDED || - udev->offload_at_suspend) { - usb_unlock_device(udev); - return -EBUSY; - } + spin_lock(&udev->offload_lock); - /* - * offload_usage could only be modified when the device is active, since - * it will alter the suspend flow of the device. - */ - ret = usb_autoresume_device(udev); - if (ret < 0) { - usb_unlock_device(udev); - return ret; + if (udev->offload_pm_locked) { + ret = -EAGAIN; + goto err; } udev->offload_usage++; - usb_autosuspend_device(udev); - usb_unlock_device(udev); + +err: + spin_unlock(&udev->offload_lock); + pm_runtime_put_autosuspend(&udev->dev); +err_rpm: + usb_put_dev(udev); return ret; } @@ -69,35 +66,32 @@ EXPORT_SYMBOL_GPL(usb_offload_get); */ int usb_offload_put(struct usb_device *udev) { - int ret; + int ret = 0; - usb_lock_device(udev); - if (udev->state == USB_STATE_NOTATTACHED) { - usb_unlock_device(udev); + if (!usb_get_dev(udev)) return -ENODEV; + + if (pm_runtime_get_if_active(&udev->dev) != 1) { + ret = -EBUSY; + goto err_rpm; } - if (udev->state == USB_STATE_SUSPENDED || - udev->offload_at_suspend) { - usb_unlock_device(udev); - return -EBUSY; - } + spin_lock(&udev->offload_lock); - /* - * offload_usage could only be modified when the device is active, since - * it will alter the suspend flow of the device. - */ - ret = usb_autoresume_device(udev); - if (ret < 0) { - usb_unlock_device(udev); - return ret; + if (udev->offload_pm_locked) { + ret = -EAGAIN; + goto err; } /* Drop the count when it wasn't 0, ignore the operation otherwise. */ if (udev->offload_usage) udev->offload_usage--; - usb_autosuspend_device(udev); - usb_unlock_device(udev); + +err: + spin_unlock(&udev->offload_lock); + pm_runtime_put_autosuspend(&udev->dev); +err_rpm: + usb_put_dev(udev); return ret; } @@ -112,25 +106,47 @@ EXPORT_SYMBOL_GPL(usb_offload_put); * management. * * The caller must hold @udev's device lock. In addition, the caller should - * ensure downstream usb devices are all either suspended or marked as - * "offload_at_suspend" to ensure the correctness of the return value. + * ensure the device itself and the downstream usb devices are all marked as + * "offload_pm_locked" to ensure the correctness of the return value. * * Returns true on any offload activity, false otherwise. */ bool usb_offload_check(struct usb_device *udev) __must_hold(&udev->dev->mutex) { struct usb_device *child; - bool active; + bool active = false; int port1; + if (udev->offload_usage) + return true; + usb_hub_for_each_child(udev, port1, child) { usb_lock_device(child); active = usb_offload_check(child); usb_unlock_device(child); + if (active) - return true; + break; } - return !!udev->offload_usage; + return active; } EXPORT_SYMBOL_GPL(usb_offload_check); + +/** + * usb_offload_set_pm_locked - set the PM lock state of a USB device + * @udev: the USB device to modify + * @locked: the new lock state + * + * Setting @locked to true prevents offload_usage from being modified. This + * ensures that offload activities cannot be started or stopped during critical + * power management transitions, maintaining a stable state for the duration + * of the transition. + */ +void usb_offload_set_pm_locked(struct usb_device *udev, bool locked) +{ + spin_lock(&udev->offload_lock); + udev->offload_pm_locked = locked; + spin_unlock(&udev->offload_lock); +} +EXPORT_SYMBOL_GPL(usb_offload_set_pm_locked);
diff --git a/drivers/usb/core/phy.c b/drivers/usb/core/phy.c index faa2005..4d966cc9 100644 --- a/drivers/usb/core/phy.c +++ b/drivers/usb/core/phy.c
@@ -114,7 +114,7 @@ EXPORT_SYMBOL_GPL(usb_phy_roothub_alloc); struct usb_phy_roothub *usb_phy_roothub_alloc_usb3_phy(struct device *dev) { struct usb_phy_roothub *phy_roothub; - int num_phys; + int num_phys, usb2_phy_index; if (!IS_ENABLED(CONFIG_GENERIC_PHY)) return NULL; @@ -124,6 +124,16 @@ struct usb_phy_roothub *usb_phy_roothub_alloc_usb3_phy(struct device *dev) if (num_phys <= 0) return NULL; + /* + * If 'usb2-phy' is not present, usb_phy_roothub_alloc() added + * all PHYs to the primary HCD's phy_roothub already, so skip + * adding 'usb3-phy' here to avoid double use of that. + */ + usb2_phy_index = of_property_match_string(dev->of_node, "phy-names", + "usb2-phy"); + if (usb2_phy_index < 0) + return NULL; + phy_roothub = devm_kzalloc(dev, sizeof(*phy_roothub), GFP_KERNEL); if (!phy_roothub) return ERR_PTR(-ENOMEM); @@ -200,16 +210,10 @@ int usb_phy_roothub_set_mode(struct usb_phy_roothub *phy_roothub, list_for_each_entry(roothub_entry, head, list) { err = phy_set_mode(roothub_entry->phy, mode); if (err) - goto err_out; + return err; } return 0; - -err_out: - list_for_each_entry_continue_reverse(roothub_entry, head, list) - phy_power_off(roothub_entry->phy); - - return err; } EXPORT_SYMBOL_GPL(usb_phy_roothub_set_mode);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 9e7e497..0ffdaef 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c
@@ -140,6 +140,8 @@ static int quirks_param_set(const char *value, const struct kernel_param *kp) case 'p': flags |= USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT; break; + case 'q': + flags |= USB_QUIRK_FORCE_ONE_CONFIG; /* Ignore unrecognized flag characters */ } } @@ -207,6 +209,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* HP v222w 16GB Mini USB Drive */ { USB_DEVICE(0x03f0, 0x3f40), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Huawei 4G LTE module ME906S */ + { USB_DEVICE(0x03f0, 0xa31d), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -376,6 +382,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* SanDisk Extreme 55AE */ { USB_DEVICE(0x0781, 0x55ae), .driver_info = USB_QUIRK_NO_LPM }, + /* Avermedia Live Gamer Ultra 2.1 (GC553G2) - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x07ca, 0x2553), .driver_info = USB_QUIRK_NO_BOS }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, @@ -392,6 +401,7 @@ static const struct usb_device_id usb_quirk_list[] = { /* Silicon Motion Flash Drive */ { USB_DEVICE(0x090c, 0x1000), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x090c, 0x2000), .driver_info = USB_QUIRK_DELAY_INIT }, /* Sound Devices USBPre2 */ { USB_DEVICE(0x0926, 0x0202), .driver_info = @@ -436,6 +446,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* ASUS TUF 4K PRO - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x0b05, 0x1ab9), .driver_info = USB_QUIRK_NO_BOS }, + /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/ { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -480,6 +493,8 @@ static const struct usb_device_id usb_quirk_list[] = { /* Razer - Razer Blade Keyboard */ { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Razer - Razer Kiyo Pro Webcam */ + { USB_DEVICE(0x1532, 0x0e05), .driver_info = USB_QUIRK_NO_LPM }, /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */ { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -564,6 +579,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM }, + /* UGREEN 35871 - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x2b89, 0x5871), .driver_info = USB_QUIRK_NO_BOS }, + /* APTIV AUTOMOTIVE HUB */ { USB_DEVICE(0x2c48, 0x0132), .driver_info = USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT }, @@ -574,12 +592,18 @@ static const struct usb_device_id usb_quirk_list[] = { /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */ { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM }, + /* ezcap401 - BOS descriptor fetch hangs at SuperSpeed Plus */ + { USB_DEVICE(0x32ed, 0x0401), .driver_info = USB_QUIRK_NO_BOS }, + /* DELL USB GEN2 */ { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, /* VCOM device */ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Noji-MCS SmartCard Reader */ + { USB_DEVICE(0x5131, 0x2007), .driver_info = USB_QUIRK_FORCE_ONE_CONFIG }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index e9a10a3..df166ca 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c
@@ -671,6 +671,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; + spin_lock_init(&dev->offload_lock); dev->offload_usage = 0; atomic_set(&dev->urbnum, 0);
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d216e26..c8b02c2 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c
@@ -4607,7 +4607,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) /* Exit clock gating when driver is stopped. */ if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended && !hsotg->params.no_clock_gating) { + spin_lock_irqsave(&hsotg->lock, flags); dwc2_gadget_exit_clock_gating(hsotg, 0); + spin_unlock_irqrestore(&hsotg->lock, flags); } /* all endpoints should be shutdown */
diff --git a/drivers/usb/dwc3/dwc3-google.c b/drivers/usb/dwc3/dwc3-google.c index 2105c72..4ca567e 100644 --- a/drivers/usb/dwc3/dwc3-google.c +++ b/drivers/usb/dwc3/dwc3-google.c
@@ -385,8 +385,9 @@ static int dwc3_google_probe(struct platform_device *pdev) "google,usb-cfg-csr", ARRAY_SIZE(args), args); if (IS_ERR(google->usb_cfg_regmap)) { - return dev_err_probe(dev, PTR_ERR(google->usb_cfg_regmap), - "invalid usb cfg csr\n"); + ret = dev_err_probe(dev, PTR_ERR(google->usb_cfg_regmap), + "invalid usb cfg csr\n"); + goto err_deinit_pdom; } google->host_cfg_offset = args[0];
diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index b3d7252..1cf9654 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c
@@ -263,7 +263,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) dwc3 = platform_get_drvdata(dwc3_imx->dwc3_pdev); if (!dwc3) { err = dev_err_probe(dev, -EPROBE_DEFER, "failed to get dwc3 platform data\n"); - goto depopulate; + goto put_dwc3; } dwc3->glue_ops = &dwc3_imx_glue_ops;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6ecadc8..6c1cbb7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -56,6 +56,7 @@ #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e #define PCI_DEVICE_ID_INTEL_CNPV 0xa3b0 #define PCI_DEVICE_ID_INTEL_RPL 0xa70e +#define PCI_DEVICE_ID_INTEL_NVLH 0xd37f #define PCI_DEVICE_ID_INTEL_PTLH 0xe332 #define PCI_DEVICE_ID_INTEL_PTLH_PCH 0xe37e #define PCI_DEVICE_ID_INTEL_PTLU 0xe432 @@ -447,6 +448,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, CNPH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, CNPV, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, RPL, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, NVLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, PTLU, &dwc3_pci_intel_swnode) },
diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index e0c0212..e495bac 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c
@@ -681,6 +681,7 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct f_ecm_opts *ecm_opts; + struct net_device *net __free(detach_gadget) = NULL; struct usb_request *request __free(free_usb_request) = NULL; if (!can_support_ecm(cdev->gadget)) @@ -688,18 +689,18 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f) ecm_opts = container_of(f->fi, struct f_ecm_opts, func_inst); - mutex_lock(&ecm_opts->lock); + scoped_guard(mutex, &ecm_opts->lock) + if (ecm_opts->bind_count == 0 && !ecm_opts->bound) { + if (!device_is_registered(&ecm_opts->net->dev)) { + gether_set_gadget(ecm_opts->net, cdev->gadget); + status = gether_register_netdev(ecm_opts->net); + } else + status = gether_attach_gadget(ecm_opts->net, cdev->gadget); - gether_set_gadget(ecm_opts->net, cdev->gadget); - - if (!ecm_opts->bound) { - status = gether_register_netdev(ecm_opts->net); - ecm_opts->bound = true; - } - - mutex_unlock(&ecm_opts->lock); - if (status) - return status; + if (status) + return status; + net = ecm_opts->net; + } ecm_string_defs[1].s = ecm->ethaddr; @@ -790,6 +791,9 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f) ecm->notify_req = no_free_ptr(request); + ecm_opts->bind_count++; + retain_and_null_ptr(net); + DBG(cdev, "CDC Ethernet: IN/%s OUT/%s NOTIFY/%s\n", ecm->port.in_ep->name, ecm->port.out_ep->name, ecm->notify->name); @@ -836,7 +840,7 @@ static void ecm_free_inst(struct usb_function_instance *f) struct f_ecm_opts *opts; opts = container_of(f, struct f_ecm_opts, func_inst); - if (opts->bound) + if (device_is_registered(&opts->net->dev)) gether_cleanup(netdev_priv(opts->net)); else free_netdev(opts->net); @@ -906,9 +910,12 @@ static void ecm_free(struct usb_function *f) static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_ecm *ecm = func_to_ecm(f); + struct f_ecm_opts *ecm_opts; DBG(c->cdev, "ecm unbind\n"); + ecm_opts = container_of(f->fi, struct f_ecm_opts, func_inst); + usb_free_all_descriptors(f); if (atomic_read(&ecm->notify_count)) { @@ -918,6 +925,10 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) kfree(ecm->notify_req->buf); usb_ep_free_request(ecm->notify, ecm->notify_req); + + ecm_opts->bind_count--; + if (ecm_opts->bind_count == 0 && !ecm_opts->bound) + gether_detach_gadget(ecm_opts->net); } static struct usb_function *ecm_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c index 0142a0e..ac37d7c 100644 --- a/drivers/usb/gadget/function/f_eem.c +++ b/drivers/usb/gadget/function/f_eem.c
@@ -7,6 +7,7 @@ * Copyright (C) 2009 EF Johnson Technologies */ +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/device.h> @@ -251,24 +252,22 @@ static int eem_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct f_eem_opts *eem_opts; + struct net_device *net __free(detach_gadget) = NULL; eem_opts = container_of(f->fi, struct f_eem_opts, func_inst); - /* - * in drivers/usb/gadget/configfs.c:configfs_composite_bind() - * configurations are bound in sequence with list_for_each_entry, - * in each configuration its functions are bound in sequence - * with list_for_each_entry, so we assume no race condition - * with regard to eem_opts->bound access - */ - if (!eem_opts->bound) { - mutex_lock(&eem_opts->lock); - gether_set_gadget(eem_opts->net, cdev->gadget); - status = gether_register_netdev(eem_opts->net); - mutex_unlock(&eem_opts->lock); - if (status) - return status; - eem_opts->bound = true; - } + + scoped_guard(mutex, &eem_opts->lock) + if (eem_opts->bind_count == 0 && !eem_opts->bound) { + if (!device_is_registered(&eem_opts->net->dev)) { + gether_set_gadget(eem_opts->net, cdev->gadget); + status = gether_register_netdev(eem_opts->net); + } else + status = gether_attach_gadget(eem_opts->net, cdev->gadget); + + if (status) + return status; + net = eem_opts->net; + } us = usb_gstrings_attach(cdev, eem_strings, ARRAY_SIZE(eem_string_defs)); @@ -279,21 +278,19 @@ static int eem_bind(struct usb_configuration *c, struct usb_function *f) /* allocate instance-specific interface IDs */ status = usb_interface_id(c, f); if (status < 0) - goto fail; + return status; eem->ctrl_id = status; eem_intf.bInterfaceNumber = status; - status = -ENODEV; - /* allocate instance-specific endpoints */ ep = usb_ep_autoconfig(cdev->gadget, &eem_fs_in_desc); if (!ep) - goto fail; + return -ENODEV; eem->port.in_ep = ep; ep = usb_ep_autoconfig(cdev->gadget, &eem_fs_out_desc); if (!ep) - goto fail; + return -ENODEV; eem->port.out_ep = ep; /* support all relevant hardware speeds... we expect that when @@ -309,16 +306,14 @@ static int eem_bind(struct usb_configuration *c, struct usb_function *f) status = usb_assign_descriptors(f, eem_fs_function, eem_hs_function, eem_ss_function, eem_ss_function); if (status) - goto fail; + return status; + + eem_opts->bind_count++; + retain_and_null_ptr(net); DBG(cdev, "CDC Ethernet (EEM): IN/%s OUT/%s\n", eem->port.in_ep->name, eem->port.out_ep->name); return 0; - -fail: - ERROR(cdev, "%s: can't bind, err %d\n", f->name, status); - - return status; } static void eem_cmd_complete(struct usb_ep *ep, struct usb_request *req) @@ -597,7 +592,7 @@ static void eem_free_inst(struct usb_function_instance *f) struct f_eem_opts *opts; opts = container_of(f, struct f_eem_opts, func_inst); - if (opts->bound) + if (device_is_registered(&opts->net->dev)) gether_cleanup(netdev_priv(opts->net)); else free_netdev(opts->net); @@ -640,9 +635,17 @@ static void eem_free(struct usb_function *f) static void eem_unbind(struct usb_configuration *c, struct usb_function *f) { + struct f_eem_opts *opts; + DBG(c->cdev, "eem unbind\n"); + opts = container_of(f->fi, struct f_eem_opts, func_inst); + usb_free_all_descriptors(f); + + opts->bind_count--; + if (opts->bind_count == 0 && !opts->bound) + gether_detach_gadget(opts->net); } static struct usb_function *eem_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8c855c0..e5ccaec 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c
@@ -1207,9 +1207,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (!hidg->interval_user_set) { hidg_fs_in_ep_desc.bInterval = 10; hidg_hs_in_ep_desc.bInterval = 4; + hidg_ss_in_ep_desc.bInterval = 4; } else { hidg_fs_in_ep_desc.bInterval = hidg->interval; hidg_hs_in_ep_desc.bInterval = hidg->interval; + hidg_ss_in_ep_desc.bInterval = hidg->interval; } hidg_ss_out_comp_desc.wBytesPerInterval = @@ -1239,9 +1241,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (!hidg->interval_user_set) { hidg_fs_out_ep_desc.bInterval = 10; hidg_hs_out_ep_desc.bInterval = 4; + hidg_ss_out_ep_desc.bInterval = 4; } else { hidg_fs_out_ep_desc.bInterval = hidg->interval; hidg_hs_out_ep_desc.bInterval = hidg->interval; + hidg_ss_out_ep_desc.bInterval = hidg->interval; } status = usb_assign_descriptors(f, hidg_fs_descriptors_intout, @@ -1258,17 +1262,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) if (status) goto fail; - spin_lock_init(&hidg->write_spinlock); hidg->write_pending = 1; hidg->req = NULL; - spin_lock_init(&hidg->read_spinlock); - spin_lock_init(&hidg->get_report_spinlock); - init_waitqueue_head(&hidg->write_queue); - init_waitqueue_head(&hidg->read_queue); - init_waitqueue_head(&hidg->get_queue); - init_waitqueue_head(&hidg->get_id_queue); - INIT_LIST_HEAD(&hidg->completed_out_req); - INIT_LIST_HEAD(&hidg->report_list); INIT_WORK(&hidg->work, get_report_workqueue_handler); hidg->workqueue = alloc_workqueue("report_work", @@ -1604,6 +1599,16 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) mutex_lock(&opts->lock); + spin_lock_init(&hidg->write_spinlock); + spin_lock_init(&hidg->read_spinlock); + spin_lock_init(&hidg->get_report_spinlock); + init_waitqueue_head(&hidg->write_queue); + init_waitqueue_head(&hidg->read_queue); + init_waitqueue_head(&hidg->get_queue); + init_waitqueue_head(&hidg->get_id_queue); + INIT_LIST_HEAD(&hidg->completed_out_req); + INIT_LIST_HEAD(&hidg->report_list); + device_initialize(&hidg->dev); hidg->dev.release = hidg_release; hidg->dev.class = &hidg_class;
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 6af96e2..b7b06cb 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -180,6 +180,7 @@ #include <linux/kthread.h> #include <linux/sched/signal.h> #include <linux/limits.h> +#include <linux/overflow.h> #include <linux/pagemap.h> #include <linux/rwsem.h> #include <linux/slab.h> @@ -1853,8 +1854,15 @@ static int check_command_size_in_blocks(struct fsg_common *common, int cmnd_size, enum data_direction data_dir, unsigned int mask, int needs_medium, const char *name) { - if (common->curlun) - common->data_size_from_cmnd <<= common->curlun->blkbits; + if (common->curlun) { + if (check_shl_overflow(common->data_size_from_cmnd, + common->curlun->blkbits, + &common->data_size_from_cmnd)) { + common->phase_error = 1; + return -EINVAL; + } + } + return check_command(common, cmnd_size, data_dir, mask, needs_medium, name); }
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 14fc7dc..a6fa5ed 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c
@@ -83,11 +83,6 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) return container_of(f, struct f_ncm, port.func); } -static inline struct f_ncm_opts *func_to_ncm_opts(struct usb_function *f) -{ - return container_of(f->fi, struct f_ncm_opts, func_inst); -} - /*-------------------------------------------------------------------------*/ /* @@ -864,7 +859,6 @@ static int ncm_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) { struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *opts = func_to_ncm_opts(f); struct usb_composite_dev *cdev = f->config->cdev; /* Control interface has only altsetting 0 */ @@ -887,13 +881,12 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (alt > 1) goto fail; - scoped_guard(mutex, &opts->lock) - if (opts->net) { - DBG(cdev, "reset ncm\n"); - opts->net = NULL; - gether_disconnect(&ncm->port); - ncm_reset_values(ncm); - } + if (ncm->netdev) { + DBG(cdev, "reset ncm\n"); + ncm->netdev = NULL; + gether_disconnect(&ncm->port); + ncm_reset_values(ncm); + } /* * CDC Network only sends data in non-default altsettings. @@ -926,8 +919,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) net = gether_connect(&ncm->port); if (IS_ERR(net)) return PTR_ERR(net); - scoped_guard(mutex, &opts->lock) - opts->net = net; + ncm->netdev = net; } spin_lock(&ncm->lock); @@ -1374,16 +1366,14 @@ static int ncm_unwrap_ntb(struct gether *port, static void ncm_disable(struct usb_function *f) { struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *opts = func_to_ncm_opts(f); struct usb_composite_dev *cdev = f->config->cdev; DBG(cdev, "ncm deactivated\n"); - scoped_guard(mutex, &opts->lock) - if (opts->net) { - opts->net = NULL; - gether_disconnect(&ncm->port); - } + if (ncm->netdev) { + ncm->netdev = NULL; + gether_disconnect(&ncm->port); + } if (ncm->notify->enabled) { usb_ep_disable(ncm->notify); @@ -1443,44 +1433,41 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) { struct usb_composite_dev *cdev = c->cdev; struct f_ncm *ncm = func_to_ncm(f); - struct f_ncm_opts *ncm_opts = func_to_ncm_opts(f); struct usb_string *us; int status = 0; struct usb_ep *ep; + struct f_ncm_opts *ncm_opts; struct usb_os_desc_table *os_desc_table __free(kfree) = NULL; - struct net_device *netdev __free(free_gether_netdev) = NULL; + struct net_device *net __free(detach_gadget) = NULL; struct usb_request *request __free(free_usb_request) = NULL; if (!can_support_ecm(cdev->gadget)) return -EINVAL; + ncm_opts = container_of(f->fi, struct f_ncm_opts, func_inst); + if (cdev->use_os_string) { os_desc_table = kzalloc(sizeof(*os_desc_table), GFP_KERNEL); if (!os_desc_table) return -ENOMEM; } - netdev = gether_setup_default(); - if (IS_ERR(netdev)) - return -ENOMEM; + scoped_guard(mutex, &ncm_opts->lock) + if (ncm_opts->bind_count == 0) { + if (!device_is_registered(&ncm_opts->net->dev)) { + ncm_opts->net->mtu = (ncm_opts->max_segment_size - ETH_HLEN); + gether_set_gadget(ncm_opts->net, cdev->gadget); + status = gether_register_netdev(ncm_opts->net); + } else + status = gether_attach_gadget(ncm_opts->net, cdev->gadget); - scoped_guard(mutex, &ncm_opts->lock) { - gether_apply_opts(netdev, &ncm_opts->net_opts); - netdev->mtu = ncm_opts->max_segment_size - ETH_HLEN; - } + if (status) + return status; + net = ncm_opts->net; + } - gether_set_gadget(netdev, cdev->gadget); - status = gether_register_netdev(netdev); - if (status) - return status; - - /* export host's Ethernet address in CDC format */ - status = gether_get_host_addr_cdc(netdev, ncm->ethaddr, - sizeof(ncm->ethaddr)); - if (status < 12) - return -EINVAL; - ncm_string_defs[STRING_MAC_IDX].s = ncm->ethaddr; + ncm_string_defs[1].s = ncm->ethaddr; us = usb_gstrings_attach(cdev, ncm_strings, ARRAY_SIZE(ncm_string_defs)); @@ -1578,8 +1565,9 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) f->os_desc_n = 1; } ncm->notify_req = no_free_ptr(request); - ncm->netdev = no_free_ptr(netdev); - ncm->port.ioport = netdev_priv(ncm->netdev); + + ncm_opts->bind_count++; + retain_and_null_ptr(net); DBG(cdev, "CDC Network: IN/%s OUT/%s NOTIFY/%s\n", ncm->port.in_ep->name, ncm->port.out_ep->name, @@ -1594,19 +1582,19 @@ static inline struct f_ncm_opts *to_f_ncm_opts(struct config_item *item) } /* f_ncm_item_ops */ -USB_ETHER_OPTS_ITEM(ncm); +USB_ETHERNET_CONFIGFS_ITEM(ncm); /* f_ncm_opts_dev_addr */ -USB_ETHER_OPTS_ATTR_DEV_ADDR(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_DEV_ADDR(ncm); /* f_ncm_opts_host_addr */ -USB_ETHER_OPTS_ATTR_HOST_ADDR(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_HOST_ADDR(ncm); /* f_ncm_opts_qmult */ -USB_ETHER_OPTS_ATTR_QMULT(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_QMULT(ncm); /* f_ncm_opts_ifname */ -USB_ETHER_OPTS_ATTR_IFNAME(ncm); +USB_ETHERNET_CONFIGFS_ITEM_ATTR_IFNAME(ncm); static ssize_t ncm_opts_max_segment_size_show(struct config_item *item, char *page) @@ -1672,27 +1660,34 @@ static void ncm_free_inst(struct usb_function_instance *f) struct f_ncm_opts *opts; opts = container_of(f, struct f_ncm_opts, func_inst); + if (device_is_registered(&opts->net->dev)) + gether_cleanup(netdev_priv(opts->net)); + else + free_netdev(opts->net); kfree(opts->ncm_interf_group); kfree(opts); } static struct usb_function_instance *ncm_alloc_inst(void) { - struct usb_function_instance *ret; + struct f_ncm_opts *opts; struct usb_os_desc *descs[1]; char *names[1]; struct config_group *ncm_interf_group; - struct f_ncm_opts *opts __free(kfree) = kzalloc_obj(*opts); + opts = kzalloc_obj(*opts); if (!opts) return ERR_PTR(-ENOMEM); - - opts->net = NULL; opts->ncm_os_desc.ext_compat_id = opts->ncm_ext_compat_id; - gether_setup_opts_default(&opts->net_opts, "usb"); mutex_init(&opts->lock); opts->func_inst.free_func_inst = ncm_free_inst; + opts->net = gether_setup_default(); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } opts->max_segment_size = ETH_FRAME_LEN; INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop); @@ -1703,30 +1698,37 @@ static struct usb_function_instance *ncm_alloc_inst(void) ncm_interf_group = usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, names, THIS_MODULE); - if (IS_ERR(ncm_interf_group)) + if (IS_ERR(ncm_interf_group)) { + ncm_free_inst(&opts->func_inst); return ERR_CAST(ncm_interf_group); + } opts->ncm_interf_group = ncm_interf_group; - ret = &opts->func_inst; - retain_and_null_ptr(opts); - return ret; + return &opts->func_inst; } static void ncm_free(struct usb_function *f) { - struct f_ncm_opts *opts = func_to_ncm_opts(f); + struct f_ncm *ncm; + struct f_ncm_opts *opts; - scoped_guard(mutex, &opts->lock) - opts->refcnt--; - kfree(func_to_ncm(f)); + ncm = func_to_ncm(f); + opts = container_of(f->fi, struct f_ncm_opts, func_inst); + kfree(ncm); + mutex_lock(&opts->lock); + opts->refcnt--; + mutex_unlock(&opts->lock); } static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_ncm *ncm = func_to_ncm(f); + struct f_ncm_opts *ncm_opts; DBG(c->cdev, "ncm unbind\n"); + ncm_opts = container_of(f->fi, struct f_ncm_opts, func_inst); + hrtimer_cancel(&ncm->task_timer); kfree(f->os_desc_table); @@ -1743,14 +1745,16 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); - ncm->port.ioport = NULL; - gether_cleanup(netdev_priv(ncm->netdev)); + ncm_opts->bind_count--; + if (ncm_opts->bind_count == 0) + gether_detach_gadget(ncm_opts->net); } static struct usb_function *ncm_alloc(struct usb_function_instance *fi) { struct f_ncm *ncm; struct f_ncm_opts *opts; + int status; /* allocate and initialize one new instance */ ncm = kzalloc(sizeof(*ncm), GFP_KERNEL); @@ -1758,12 +1762,22 @@ static struct usb_function *ncm_alloc(struct usb_function_instance *fi) return ERR_PTR(-ENOMEM); opts = container_of(fi, struct f_ncm_opts, func_inst); + mutex_lock(&opts->lock); + opts->refcnt++; - scoped_guard(mutex, &opts->lock) - opts->refcnt++; + /* export host's Ethernet address in CDC format */ + status = gether_get_host_addr_cdc(opts->net, ncm->ethaddr, + sizeof(ncm->ethaddr)); + if (status < 12) { /* strlen("01234567890a") */ + kfree(ncm); + mutex_unlock(&opts->lock); + return ERR_PTR(-EINVAL); + } spin_lock_init(&ncm->lock); ncm_reset_values(ncm); + ncm->port.ioport = netdev_priv(opts->net); + mutex_unlock(&opts->lock); ncm->port.is_fixed = true; ncm->port.supports_multi_frame = true;
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index 8b11d8d..7de1c5f 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c
@@ -11,6 +11,7 @@ /* #define VERBOSE_DEBUG */ +#include <linux/cleanup.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> @@ -665,6 +666,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) struct f_rndis_opts *rndis_opts; struct usb_os_desc_table *os_desc_table __free(kfree) = NULL; + struct net_device *net __free(detach_gadget) = NULL; struct usb_request *request __free(free_usb_request) = NULL; if (!can_support_rndis(c)) @@ -678,23 +680,22 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) return -ENOMEM; } - rndis_iad_descriptor.bFunctionClass = rndis_opts->class; - rndis_iad_descriptor.bFunctionSubClass = rndis_opts->subclass; - rndis_iad_descriptor.bFunctionProtocol = rndis_opts->protocol; + scoped_guard(mutex, &rndis_opts->lock) { + rndis_iad_descriptor.bFunctionClass = rndis_opts->class; + rndis_iad_descriptor.bFunctionSubClass = rndis_opts->subclass; + rndis_iad_descriptor.bFunctionProtocol = rndis_opts->protocol; - /* - * in drivers/usb/gadget/configfs.c:configfs_composite_bind() - * configurations are bound in sequence with list_for_each_entry, - * in each configuration its functions are bound in sequence - * with list_for_each_entry, so we assume no race condition - * with regard to rndis_opts->bound access - */ - if (!rndis_opts->bound) { - gether_set_gadget(rndis_opts->net, cdev->gadget); - status = gether_register_netdev(rndis_opts->net); - if (status) - return status; - rndis_opts->bound = true; + if (rndis_opts->bind_count == 0 && !rndis_opts->borrowed_net) { + if (!device_is_registered(&rndis_opts->net->dev)) { + gether_set_gadget(rndis_opts->net, cdev->gadget); + status = gether_register_netdev(rndis_opts->net); + } else + status = gether_attach_gadget(rndis_opts->net, cdev->gadget); + + if (status) + return status; + net = rndis_opts->net; + } } us = usb_gstrings_attach(cdev, rndis_strings, @@ -793,6 +794,9 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) } rndis->notify_req = no_free_ptr(request); + rndis_opts->bind_count++; + retain_and_null_ptr(net); + /* NOTE: all that is done without knowing or caring about * the network link ... which is unavailable to this code * until we're activated via set_alt(). @@ -809,11 +813,11 @@ void rndis_borrow_net(struct usb_function_instance *f, struct net_device *net) struct f_rndis_opts *opts; opts = container_of(f, struct f_rndis_opts, func_inst); - if (opts->bound) + if (device_is_registered(&opts->net->dev)) gether_cleanup(netdev_priv(opts->net)); else free_netdev(opts->net); - opts->borrowed_net = opts->bound = true; + opts->borrowed_net = true; opts->net = net; } EXPORT_SYMBOL_GPL(rndis_borrow_net); @@ -871,7 +875,7 @@ static void rndis_free_inst(struct usb_function_instance *f) opts = container_of(f, struct f_rndis_opts, func_inst); if (!opts->borrowed_net) { - if (opts->bound) + if (device_is_registered(&opts->net->dev)) gether_cleanup(netdev_priv(opts->net)); else free_netdev(opts->net); @@ -940,6 +944,9 @@ static void rndis_free(struct usb_function *f) static void rndis_unbind(struct usb_configuration *c, struct usb_function *f) { struct f_rndis *rndis = func_to_rndis(f); + struct f_rndis_opts *rndis_opts; + + rndis_opts = container_of(f->fi, struct f_rndis_opts, func_inst); kfree(f->os_desc_table); f->os_desc_n = 0; @@ -947,6 +954,10 @@ static void rndis_unbind(struct usb_configuration *c, struct usb_function *f) kfree(rndis->notify_req->buf); usb_ep_free_request(rndis->notify, rndis->notify_req); + + rndis_opts->bind_count--; + if (rndis_opts->bind_count == 0 && !rndis_opts->borrowed_net) + gether_detach_gadget(rndis_opts->net); } static struct usb_function *rndis_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/f_subset.c b/drivers/usb/gadget/function/f_subset.c index 0760723..6e3265b 100644 --- a/drivers/usb/gadget/function/f_subset.c +++ b/drivers/usb/gadget/function/f_subset.c
@@ -6,6 +6,7 @@ * Copyright (C) 2008 Nokia Corporation */ +#include <linux/cleanup.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> @@ -298,25 +299,22 @@ geth_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct f_gether_opts *gether_opts; + struct net_device *net __free(detach_gadget) = NULL; gether_opts = container_of(f->fi, struct f_gether_opts, func_inst); - /* - * in drivers/usb/gadget/configfs.c:configfs_composite_bind() - * configurations are bound in sequence with list_for_each_entry, - * in each configuration its functions are bound in sequence - * with list_for_each_entry, so we assume no race condition - * with regard to gether_opts->bound access - */ - if (!gether_opts->bound) { - mutex_lock(&gether_opts->lock); - gether_set_gadget(gether_opts->net, cdev->gadget); - status = gether_register_netdev(gether_opts->net); - mutex_unlock(&gether_opts->lock); - if (status) - return status; - gether_opts->bound = true; - } + scoped_guard(mutex, &gether_opts->lock) + if (gether_opts->bind_count == 0 && !gether_opts->bound) { + if (!device_is_registered(&gether_opts->net->dev)) { + gether_set_gadget(gether_opts->net, cdev->gadget); + status = gether_register_netdev(gether_opts->net); + } else + status = gether_attach_gadget(gether_opts->net, cdev->gadget); + + if (status) + return status; + net = gether_opts->net; + } us = usb_gstrings_attach(cdev, geth_strings, ARRAY_SIZE(geth_string_defs)); @@ -329,20 +327,18 @@ geth_bind(struct usb_configuration *c, struct usb_function *f) /* allocate instance-specific interface IDs */ status = usb_interface_id(c, f); if (status < 0) - goto fail; + return status; subset_data_intf.bInterfaceNumber = status; - status = -ENODEV; - /* allocate instance-specific endpoints */ ep = usb_ep_autoconfig(cdev->gadget, &fs_subset_in_desc); if (!ep) - goto fail; + return -ENODEV; geth->port.in_ep = ep; ep = usb_ep_autoconfig(cdev->gadget, &fs_subset_out_desc); if (!ep) - goto fail; + return -ENODEV; geth->port.out_ep = ep; /* support all relevant hardware speeds... we expect that when @@ -360,21 +356,19 @@ geth_bind(struct usb_configuration *c, struct usb_function *f) status = usb_assign_descriptors(f, fs_eth_function, hs_eth_function, ss_eth_function, ss_eth_function); if (status) - goto fail; + return status; /* NOTE: all that is done without knowing or caring about * the network link ... which is unavailable to this code * until we're activated via set_alt(). */ + gether_opts->bind_count++; + retain_and_null_ptr(net); + DBG(cdev, "CDC Subset: IN/%s OUT/%s\n", geth->port.in_ep->name, geth->port.out_ep->name); return 0; - -fail: - ERROR(cdev, "%s: can't bind, err %d\n", f->name, status); - - return status; } static inline struct f_gether_opts *to_f_gether_opts(struct config_item *item) @@ -417,7 +411,7 @@ static void geth_free_inst(struct usb_function_instance *f) struct f_gether_opts *opts; opts = container_of(f, struct f_gether_opts, func_inst); - if (opts->bound) + if (device_is_registered(&opts->net->dev)) gether_cleanup(netdev_priv(opts->net)); else free_netdev(opts->net); @@ -449,15 +443,28 @@ static struct usb_function_instance *geth_alloc_inst(void) static void geth_free(struct usb_function *f) { struct f_gether *eth; + struct f_gether_opts *opts; + + opts = container_of(f->fi, struct f_gether_opts, func_inst); eth = func_to_geth(f); + scoped_guard(mutex, &opts->lock) + opts->refcnt--; kfree(eth); } static void geth_unbind(struct usb_configuration *c, struct usb_function *f) { + struct f_gether_opts *opts; + + opts = container_of(f->fi, struct f_gether_opts, func_inst); + geth_string_defs[0].id = 0; usb_free_all_descriptors(f); + + opts->bind_count--; + if (opts->bind_count == 0 && !opts->bound) + gether_detach_gadget(opts->net); } static struct usb_function *geth_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index ec050d8..a7853dc 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1222,6 +1222,13 @@ static void usbg_submit_cmd(struct usbg_cmd *cmd) se_cmd = &cmd->se_cmd; tpg = cmd->fu->tpg; tv_nexus = tpg->tpg_nexus; + if (!tv_nexus) { + struct usb_gadget *gadget = fuas_to_gadget(cmd->fu); + + dev_err(&gadget->dev, "Missing nexus, ignoring command\n"); + return; + } + dir = get_cmd_dir(cmd->cmd_buf); if (dir < 0) goto out; @@ -1483,6 +1490,13 @@ static void bot_cmd_work(struct work_struct *work) se_cmd = &cmd->se_cmd; tpg = cmd->fu->tpg; tv_nexus = tpg->tpg_nexus; + if (!tv_nexus) { + struct usb_gadget *gadget = fuas_to_gadget(cmd->fu); + + dev_err(&gadget->dev, "Missing nexus, ignoring command\n"); + return; + } + dir = get_cmd_dir(cmd->cmd_buf); if (dir < 0) goto out;
diff --git a/drivers/usb/gadget/function/f_uac1_legacy.c b/drivers/usb/gadget/function/f_uac1_legacy.c index a0c953a..5d201a2 100644 --- a/drivers/usb/gadget/function/f_uac1_legacy.c +++ b/drivers/usb/gadget/function/f_uac1_legacy.c
@@ -360,19 +360,46 @@ static int f_audio_out_ep_complete(struct usb_ep *ep, struct usb_request *req) static void f_audio_complete(struct usb_ep *ep, struct usb_request *req) { struct f_audio *audio = req->context; - int status = req->status; - u32 data = 0; struct usb_ep *out_ep = audio->out_ep; - switch (status) { - - case 0: /* normal completion? */ - if (ep == out_ep) + switch (req->status) { + case 0: + if (ep == out_ep) { f_audio_out_ep_complete(ep, req); - else if (audio->set_con) { - memcpy(&data, req->buf, req->length); - audio->set_con->set(audio->set_con, audio->set_cmd, - le16_to_cpu(data)); + } else if (audio->set_con) { + struct usb_audio_control *con = audio->set_con; + u8 type = con->type; + u32 data; + bool valid_request = false; + + switch (type) { + case UAC_FU_MUTE: { + u8 value; + + if (req->actual == sizeof(value)) { + memcpy(&value, req->buf, sizeof(value)); + data = value; + valid_request = true; + } + break; + } + case UAC_FU_VOLUME: { + __le16 value; + + if (req->actual == sizeof(value)) { + memcpy(&value, req->buf, sizeof(value)); + data = le16_to_cpu(value); + valid_request = true; + } + break; + } + } + + if (valid_request) + con->set(con, audio->set_cmd, data); + else + usb_ep_set_halt(ep); + audio->set_con = NULL; } break;
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 494fdbc..8d404d8 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c
@@ -413,6 +413,12 @@ uvc_function_disconnect(struct uvc_device *uvc) { int ret; + guard(mutex)(&uvc->lock); + if (uvc->func_unbound) { + dev_dbg(&uvc->vdev.dev, "skipping function deactivate (unbound)\n"); + return; + } + if ((ret = usb_function_deactivate(&uvc->func)) < 0) uvcg_info(&uvc->func, "UVC disconnect failed with %d\n", ret); } @@ -431,6 +437,15 @@ static ssize_t function_name_show(struct device *dev, static DEVICE_ATTR_RO(function_name); +static void uvc_vdev_release(struct video_device *vdev) +{ + struct uvc_device *uvc = video_get_drvdata(vdev); + + /* Signal uvc_function_unbind() that the video device has been released */ + if (uvc->vdev_release_done) + complete(uvc->vdev_release_done); +} + static int uvc_register_video(struct uvc_device *uvc) { @@ -443,7 +458,7 @@ uvc_register_video(struct uvc_device *uvc) uvc->vdev.v4l2_dev->dev = &cdev->gadget->dev; uvc->vdev.fops = &uvc_v4l2_fops; uvc->vdev.ioctl_ops = &uvc_v4l2_ioctl_ops; - uvc->vdev.release = video_device_release_empty; + uvc->vdev.release = uvc_vdev_release; uvc->vdev.vfl_dir = VFL_DIR_TX; uvc->vdev.lock = &uvc->video.mutex; uvc->vdev.device_caps = V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_STREAMING; @@ -659,6 +674,8 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) int ret = -EINVAL; uvcg_info(f, "%s()\n", __func__); + scoped_guard(mutex, &uvc->lock) + uvc->func_unbound = false; opts = fi_to_f_uvc_opts(f->fi); /* Sanity check the streaming endpoint module parameters. */ @@ -988,12 +1005,19 @@ static void uvc_free(struct usb_function *f) static void uvc_function_unbind(struct usb_configuration *c, struct usb_function *f) { + DECLARE_COMPLETION_ONSTACK(vdev_release_done); struct usb_composite_dev *cdev = c->cdev; struct uvc_device *uvc = to_uvc(f); struct uvc_video *video = &uvc->video; long wait_ret = 1; + bool connected; uvcg_info(f, "%s()\n", __func__); + scoped_guard(mutex, &uvc->lock) { + uvc->func_unbound = true; + uvc->vdev_release_done = &vdev_release_done; + connected = uvc->func_connected; + } kthread_cancel_work_sync(&video->hw_submit); @@ -1006,7 +1030,7 @@ static void uvc_function_unbind(struct usb_configuration *c, * though the video device removal uevent. Allow some time for the * application to close out before things get deleted. */ - if (uvc->func_connected) { + if (connected) { uvcg_dbg(f, "waiting for clean disconnect\n"); wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, uvc->func_connected == false, msecs_to_jiffies(500)); @@ -1017,7 +1041,10 @@ static void uvc_function_unbind(struct usb_configuration *c, video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); - if (uvc->func_connected) { + scoped_guard(mutex, &uvc->lock) + connected = uvc->func_connected; + + if (connected) { /* * Wait for the release to occur to ensure there are no longer any * pending operations that may cause panics when resources are cleaned @@ -1029,6 +1056,10 @@ static void uvc_function_unbind(struct usb_configuration *c, uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret); } + /* Wait for the video device to be released */ + wait_for_completion(&vdev_release_done); + uvc->vdev_release_done = NULL; + usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); @@ -1047,6 +1078,8 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) return ERR_PTR(-ENOMEM); mutex_init(&uvc->video.mutex); + mutex_init(&uvc->lock); + uvc->func_unbound = true; uvc->state = UVC_STATE_DISCONNECTED; init_waitqueue_head(&uvc->func_connected_queue); opts = fi_to_f_uvc_opts(fi);
diff --git a/drivers/usb/gadget/function/u_ecm.h b/drivers/usb/gadget/function/u_ecm.h index 77cfb89..7f666b9 100644 --- a/drivers/usb/gadget/function/u_ecm.h +++ b/drivers/usb/gadget/function/u_ecm.h
@@ -15,17 +15,26 @@ #include <linux/usb/composite.h> +/** + * struct f_ecm_opts - ECM function options + * @func_inst: USB function instance. + * @net: The net_device associated with the ECM function. + * @bound: True if the net_device is shared and pre-registered during the + * legacy composite driver's bind phase (e.g., multi.c). If false, + * the ECM function will register the net_device during its own + * bind phase. + * @bind_count: Tracks the number of configurations the ECM function is + * bound to, preventing double-registration of the @net device. + * @lock: Protects the data from concurrent access by configfs read/write + * and create symlink/remove symlink operations. + * @refcnt: Reference counter for the function instance. + */ struct f_ecm_opts { struct usb_function_instance func_inst; struct net_device *net; bool bound; + int bind_count; - /* - * Read/write access to configfs attributes is handled by configfs. - * - * This is to protect the data from concurrent access by read/write - * and create symlink/remove symlink. - */ struct mutex lock; int refcnt; };
diff --git a/drivers/usb/gadget/function/u_eem.h b/drivers/usb/gadget/function/u_eem.h index 3bd85df..78ef558 100644 --- a/drivers/usb/gadget/function/u_eem.h +++ b/drivers/usb/gadget/function/u_eem.h
@@ -15,17 +15,26 @@ #include <linux/usb/composite.h> +/** + * struct f_eem_opts - EEM function options + * @func_inst: USB function instance. + * @net: The net_device associated with the EEM function. + * @bound: True if the net_device is shared and pre-registered during the + * legacy composite driver's bind phase (e.g., multi.c). If false, + * the EEM function will register the net_device during its own + * bind phase. + * @bind_count: Tracks the number of configurations the EEM function is + * bound to, preventing double-registration of the @net device. + * @lock: Protects the data from concurrent access by configfs read/write + * and create symlink/remove symlink operations. + * @refcnt: Reference counter for the function instance. + */ struct f_eem_opts { struct usb_function_instance func_inst; struct net_device *net; bool bound; + int bind_count; - /* - * Read/write access to configfs attributes is handled by configfs. - * - * This is to protect the data from concurrent access by read/write - * and create symlink/remove symlink. - */ struct mutex lock; int refcnt; };
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 338f6e2..59d85d6a 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c
@@ -113,8 +113,10 @@ static void eth_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *p) strscpy(p->driver, "g_ether", sizeof(p->driver)); strscpy(p->version, UETH__VERSION, sizeof(p->version)); - strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version)); - strscpy(p->bus_info, dev_name(&dev->gadget->dev), sizeof(p->bus_info)); + if (dev->gadget) { + strscpy(p->fw_version, dev->gadget->name, sizeof(p->fw_version)); + strscpy(p->bus_info, dev_name(&dev->gadget->dev), sizeof(p->bus_info)); + } } /* REVISIT can also support: @@ -897,6 +899,28 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g) } EXPORT_SYMBOL_GPL(gether_set_gadget); +int gether_attach_gadget(struct net_device *net, struct usb_gadget *g) +{ + int ret; + + ret = device_move(&net->dev, &g->dev, DPM_ORDER_DEV_AFTER_PARENT); + if (ret) + return ret; + + gether_set_gadget(net, g); + return 0; +} +EXPORT_SYMBOL_GPL(gether_attach_gadget); + +void gether_detach_gadget(struct net_device *net) +{ + struct eth_dev *dev = netdev_priv(net); + + device_move(&net->dev, NULL, DPM_ORDER_NONE); + dev->gadget = NULL; +} +EXPORT_SYMBOL_GPL(gether_detach_gadget); + int gether_set_dev_addr(struct net_device *net, const char *dev_addr) { struct eth_dev *dev; @@ -1040,36 +1064,6 @@ int gether_set_ifname(struct net_device *net, const char *name, int len) } EXPORT_SYMBOL_GPL(gether_set_ifname); -void gether_setup_opts_default(struct gether_opts *opts, const char *name) -{ - opts->qmult = QMULT_DEFAULT; - snprintf(opts->name, sizeof(opts->name), "%s%%d", name); - eth_random_addr(opts->dev_mac); - opts->addr_assign_type = NET_ADDR_RANDOM; - eth_random_addr(opts->host_mac); -} -EXPORT_SYMBOL_GPL(gether_setup_opts_default); - -void gether_apply_opts(struct net_device *net, struct gether_opts *opts) -{ - struct eth_dev *dev = netdev_priv(net); - - dev->qmult = opts->qmult; - - if (opts->ifname_set) { - strscpy(net->name, opts->name, sizeof(net->name)); - dev->ifname_set = true; - } - - memcpy(dev->host_mac, opts->host_mac, sizeof(dev->host_mac)); - - if (opts->addr_assign_type == NET_ADDR_SET) { - memcpy(dev->dev_mac, opts->dev_mac, sizeof(dev->dev_mac)); - net->addr_assign_type = opts->addr_assign_type; - } -} -EXPORT_SYMBOL_GPL(gether_apply_opts); - void gether_suspend(struct gether *link) { struct eth_dev *dev = link->ioport; @@ -1126,21 +1120,6 @@ void gether_cleanup(struct eth_dev *dev) } EXPORT_SYMBOL_GPL(gether_cleanup); -void gether_unregister_free_netdev(struct net_device *net) -{ - if (!net) - return; - - struct eth_dev *dev = netdev_priv(net); - - if (net->reg_state == NETREG_REGISTERED) { - unregister_netdev(net); - flush_work(&dev->work); - } - free_netdev(net); -} -EXPORT_SYMBOL_GPL(gether_unregister_free_netdev); - /** * gether_connect - notify network layer that USB link is active * @link: the USB link, set up with endpoints, descriptors matching @@ -1246,6 +1225,11 @@ void gether_disconnect(struct gether *link) DBG(dev, "%s\n", __func__); + spin_lock(&dev->lock); + dev->port_usb = NULL; + link->is_suspend = false; + spin_unlock(&dev->lock); + netif_stop_queue(dev->net); netif_carrier_off(dev->net); @@ -1283,11 +1267,6 @@ void gether_disconnect(struct gether *link) dev->header_len = 0; dev->unwrap = NULL; dev->wrap = NULL; - - spin_lock(&dev->lock); - dev->port_usb = NULL; - link->is_suspend = false; - spin_unlock(&dev->lock); } EXPORT_SYMBOL_GPL(gether_disconnect);
diff --git a/drivers/usb/gadget/function/u_ether.h b/drivers/usb/gadget/function/u_ether.h index a212a8e..c85a1cf 100644 --- a/drivers/usb/gadget/function/u_ether.h +++ b/drivers/usb/gadget/function/u_ether.h
@@ -38,31 +38,6 @@ struct eth_dev; -/** - * struct gether_opts - Options for Ethernet gadget function instances - * @name: Pattern for the network interface name (e.g., "usb%d"). - * Used to generate the net device name. - * @qmult: Queue length multiplier for high/super speed. - * @host_mac: The MAC address to be used by the host side. - * @dev_mac: The MAC address to be used by the device side. - * @ifname_set: True if the interface name pattern has been set by userspace. - * @addr_assign_type: The method used for assigning the device MAC address - * (e.g., NET_ADDR_RANDOM, NET_ADDR_SET). - * - * This structure caches network-related settings provided through configfs - * before the net_device is fully instantiated. This allows for early - * configuration while deferring net_device allocation until the function - * is bound. - */ -struct gether_opts { - char name[IFNAMSIZ]; - unsigned int qmult; - u8 host_mac[ETH_ALEN]; - u8 dev_mac[ETH_ALEN]; - bool ifname_set; - unsigned char addr_assign_type; -}; - /* * This represents the USB side of an "ethernet" link, managed by a USB * function which provides control and (maybe) framing. Two functions @@ -176,6 +151,32 @@ static inline struct net_device *gether_setup_default(void) void gether_set_gadget(struct net_device *net, struct usb_gadget *g); /** + * gether_attach_gadget - Reparent net_device to the gadget device. + * @net: The network device to reparent. + * @g: The target USB gadget device to parent to. + * + * This function moves the network device to be a child of the USB gadget + * device in the device hierarchy. This is typically done when the function + * is bound to a configuration. + * + * Returns 0 on success, or a negative error code on failure. + */ +int gether_attach_gadget(struct net_device *net, struct usb_gadget *g); + +/** + * gether_detach_gadget - Detach net_device from its gadget parent. + * @net: The network device to detach. + * + * This function moves the network device to be a child of the virtual + * devices parent, effectively detaching it from the USB gadget device + * hierarchy. This is typically done when the function is unbound + * from a configuration but the instance is not yet freed. + */ +void gether_detach_gadget(struct net_device *net); + +DEFINE_FREE(detach_gadget, struct net_device *, if (_T) gether_detach_gadget(_T)) + +/** * gether_set_dev_addr - initialize an ethernet-over-usb link with eth address * @net: device representing this link * @dev_addr: eth address of this device @@ -283,11 +284,6 @@ int gether_get_ifname(struct net_device *net, char *name, int len); int gether_set_ifname(struct net_device *net, const char *name, int len); void gether_cleanup(struct eth_dev *dev); -void gether_unregister_free_netdev(struct net_device *net); -DEFINE_FREE(free_gether_netdev, struct net_device *, gether_unregister_free_netdev(_T)); - -void gether_setup_opts_default(struct gether_opts *opts, const char *name); -void gether_apply_opts(struct net_device *net, struct gether_opts *opts); void gether_suspend(struct gether *link); void gether_resume(struct gether *link);
diff --git a/drivers/usb/gadget/function/u_ether_configfs.h b/drivers/usb/gadget/function/u_ether_configfs.h index 217990a..51f0d79 100644 --- a/drivers/usb/gadget/function/u_ether_configfs.h +++ b/drivers/usb/gadget/function/u_ether_configfs.h
@@ -13,13 +13,6 @@ #ifndef __U_ETHER_CONFIGFS_H #define __U_ETHER_CONFIGFS_H -#include <linux/cleanup.h> -#include <linux/hex.h> -#include <linux/if_ether.h> -#include <linux/mutex.h> -#include <linux/netdevice.h> -#include <linux/rtnetlink.h> - #define USB_ETHERNET_CONFIGFS_ITEM(_f_) \ static void _f_##_attr_release(struct config_item *item) \ { \ @@ -204,174 +197,4 @@ out: \ \ CONFIGFS_ATTR(_f_##_opts_, _n_) -#define USB_ETHER_OPTS_ITEM(_f_) \ - static void _f_##_attr_release(struct config_item *item) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - usb_put_function_instance(&opts->func_inst); \ - } \ - \ - static struct configfs_item_operations _f_##_item_ops = { \ - .release = _f_##_attr_release, \ - } - -#define USB_ETHER_OPTS_ATTR_DEV_ADDR(_f_) \ - static ssize_t _f_##_opts_dev_addr_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%pM\n", opts->net_opts.dev_mac); \ - } \ - \ - static ssize_t _f_##_opts_dev_addr_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u8 new_addr[ETH_ALEN]; \ - const char *p = page; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - for (int i = 0; i < ETH_ALEN; i++) { \ - unsigned char num; \ - if ((*p == '.') || (*p == ':')) \ - p++; \ - num = hex_to_bin(*p++) << 4; \ - num |= hex_to_bin(*p++); \ - new_addr[i] = num; \ - } \ - if (!is_valid_ether_addr(new_addr)) \ - return -EINVAL; \ - memcpy(opts->net_opts.dev_mac, new_addr, ETH_ALEN); \ - opts->net_opts.addr_assign_type = NET_ADDR_SET; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, dev_addr) - -#define USB_ETHER_OPTS_ATTR_HOST_ADDR(_f_) \ - static ssize_t _f_##_opts_host_addr_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%pM\n", opts->net_opts.host_mac); \ - } \ - \ - static ssize_t _f_##_opts_host_addr_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u8 new_addr[ETH_ALEN]; \ - const char *p = page; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - for (int i = 0; i < ETH_ALEN; i++) { \ - unsigned char num; \ - if ((*p == '.') || (*p == ':')) \ - p++; \ - num = hex_to_bin(*p++) << 4; \ - num |= hex_to_bin(*p++); \ - new_addr[i] = num; \ - } \ - if (!is_valid_ether_addr(new_addr)) \ - return -EINVAL; \ - memcpy(opts->net_opts.host_mac, new_addr, ETH_ALEN); \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, host_addr) - -#define USB_ETHER_OPTS_ATTR_QMULT(_f_) \ - static ssize_t _f_##_opts_qmult_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - \ - guard(mutex)(&opts->lock); \ - return sysfs_emit(page, "%u\n", opts->net_opts.qmult); \ - } \ - \ - static ssize_t _f_##_opts_qmult_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - u32 val; \ - int ret; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - \ - ret = kstrtou32(page, 0, &val); \ - if (ret) \ - return ret; \ - \ - opts->net_opts.qmult = val; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, qmult) - -#define USB_ETHER_OPTS_ATTR_IFNAME(_f_) \ - static ssize_t _f_##_opts_ifname_show(struct config_item *item, \ - char *page) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - const char *name; \ - \ - guard(mutex)(&opts->lock); \ - rtnl_lock(); \ - if (opts->net_opts.ifname_set) \ - name = opts->net_opts.name; \ - else if (opts->net) \ - name = netdev_name(opts->net); \ - else \ - name = "(inactive net_device)"; \ - rtnl_unlock(); \ - return sysfs_emit(page, "%s\n", name); \ - } \ - \ - static ssize_t _f_##_opts_ifname_store(struct config_item *item, \ - const char *page, size_t len) \ - { \ - struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - char tmp[IFNAMSIZ]; \ - const char *p; \ - size_t c_len = len; \ - \ - if (c_len > 0 && page[c_len - 1] == '\n') \ - c_len--; \ - \ - if (c_len >= sizeof(tmp)) \ - return -E2BIG; \ - \ - strscpy(tmp, page, c_len + 1); \ - if (!dev_valid_name(tmp)) \ - return -EINVAL; \ - \ - /* Require exactly one %d */ \ - p = strchr(tmp, '%'); \ - if (!p || p[1] != 'd' || strchr(p + 2, '%')) \ - return -EINVAL; \ - \ - guard(mutex)(&opts->lock); \ - if (opts->refcnt) \ - return -EBUSY; \ - strscpy(opts->net_opts.name, tmp, sizeof(opts->net_opts.name)); \ - opts->net_opts.ifname_set = true; \ - return len; \ - } \ - \ - CONFIGFS_ATTR(_f_##_opts_, ifname) - #endif /* __U_ETHER_CONFIGFS_H */
diff --git a/drivers/usb/gadget/function/u_gether.h b/drivers/usb/gadget/function/u_gether.h index 2f7a373e..e7b6b51 100644 --- a/drivers/usb/gadget/function/u_gether.h +++ b/drivers/usb/gadget/function/u_gether.h
@@ -15,17 +15,25 @@ #include <linux/usb/composite.h> +/** + * struct f_gether_opts - subset function options + * @func_inst: USB function instance. + * @net: The net_device associated with the subset function. + * @bound: True if the net_device is shared and pre-registered during the + * legacy composite driver's bind phase (e.g., multi.c). If false, + * the subset function will register the net_device during its own + * bind phase. + * @bind_count: Tracks the number of configurations the subset function is + * bound to, preventing double-registration of the @net device. + * @lock: Protects the data from concurrent access by configfs read/write + * and create symlink/remove symlink operations. + * @refcnt: Reference counter for the function instance. + */ struct f_gether_opts { struct usb_function_instance func_inst; struct net_device *net; bool bound; - - /* - * Read/write access to configfs attributes is handled by configfs. - * - * This is to protect the data from concurrent access by read/write - * and create symlink/remove symlink. - */ + int bind_count; struct mutex lock; int refcnt; };
diff --git a/drivers/usb/gadget/function/u_ncm.h b/drivers/usb/gadget/function/u_ncm.h index d99330f..ce2f635 100644 --- a/drivers/usb/gadget/function/u_ncm.h +++ b/drivers/usb/gadget/function/u_ncm.h
@@ -15,22 +15,29 @@ #include <linux/usb/composite.h> -#include "u_ether.h" - +/** + * struct f_ncm_opts - NCM function options + * @func_inst: USB function instance. + * @net: The net_device associated with the NCM function. + * @bind_count: Tracks the number of configurations the NCM function is + * bound to, preventing double-registration of the @net device. + * @ncm_interf_group: ConfigFS group for NCM interface. + * @ncm_os_desc: USB OS descriptor for NCM. + * @ncm_ext_compat_id: Extended compatibility ID. + * @lock: Protects the data from concurrent access by configfs read/write + * and create symlink/remove symlink operations. + * @refcnt: Reference counter for the function instance. + * @max_segment_size: Maximum segment size. + */ struct f_ncm_opts { struct usb_function_instance func_inst; struct net_device *net; + int bind_count; - struct gether_opts net_opts; struct config_group *ncm_interf_group; struct usb_os_desc ncm_os_desc; char ncm_ext_compat_id[16]; - /* - * Read/write access to configfs attributes is handled by configfs. - * - * This is to protect the data from concurrent access by read/write - * and create symlink/remove symlink. - */ + struct mutex lock; int refcnt;
diff --git a/drivers/usb/gadget/function/u_rndis.h b/drivers/usb/gadget/function/u_rndis.h index a8c409b..4e64619 100644 --- a/drivers/usb/gadget/function/u_rndis.h +++ b/drivers/usb/gadget/function/u_rndis.h
@@ -15,12 +15,34 @@ #include <linux/usb/composite.h> +/** + * struct f_rndis_opts - RNDIS function options + * @func_inst: USB function instance. + * @vendor_id: Vendor ID. + * @manufacturer: Manufacturer string. + * @net: The net_device associated with the RNDIS function. + * @bind_count: Tracks the number of configurations the RNDIS function is + * bound to, preventing double-registration of the @net device. + * @borrowed_net: True if the net_device is shared and pre-registered during + * the legacy composite driver's bind phase (e.g., multi.c). + * If false, the RNDIS function will register the net_device + * during its own bind phase. + * @rndis_interf_group: ConfigFS group for RNDIS interface. + * @rndis_os_desc: USB OS descriptor for RNDIS. + * @rndis_ext_compat_id: Extended compatibility ID. + * @class: USB class. + * @subclass: USB subclass. + * @protocol: USB protocol. + * @lock: Protects the data from concurrent access by configfs read/write + * and create symlink/remove symlink operations. + * @refcnt: Reference counter for the function instance. + */ struct f_rndis_opts { struct usb_function_instance func_inst; u32 vendor_id; const char *manufacturer; struct net_device *net; - bool bound; + int bind_count; bool borrowed_net; struct config_group *rndis_interf_group; @@ -30,13 +52,6 @@ struct f_rndis_opts { u8 class; u8 subclass; u8 protocol; - - /* - * Read/write access to configfs attributes is handled by configfs. - * - * This is to protect the data from concurrent access by read/write - * and create symlink/remove symlink. - */ struct mutex lock; int refcnt; };
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index 676419a..7abfdd5 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h
@@ -155,6 +155,9 @@ struct uvc_device { enum uvc_state state; struct usb_function func; struct uvc_video video; + struct completion *vdev_release_done; + struct mutex lock; /* protects func_unbound and func_connected */ + bool func_unbound; bool func_connected; wait_queue_head_t func_connected_queue;
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index ed48d38..514e593 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c
@@ -574,6 +574,8 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh, if (sub->type < UVC_EVENT_FIRST || sub->type > UVC_EVENT_LAST) return -EINVAL; + guard(mutex)(&uvc->lock); + if (sub->type == UVC_EVENT_SETUP && uvc->func_connected) return -EBUSY; @@ -595,7 +597,8 @@ static void uvc_v4l2_disable(struct uvc_device *uvc) uvc_function_disconnect(uvc); uvcg_video_disable(&uvc->video); uvcg_free_buffers(&uvc->video.queue); - uvc->func_connected = false; + scoped_guard(mutex, &uvc->lock) + uvc->func_connected = false; wake_up_interruptible(&uvc->func_connected_queue); }
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 7cea641..2f9700b 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c
@@ -513,7 +513,7 @@ uvc_video_prep_requests(struct uvc_video *video) return; } - interval_duration = 2 << (video->ep->desc->bInterval - 1); + interval_duration = 1 << (video->ep->desc->bInterval - 1); if (cdev->gadget->speed < USB_SPEED_HIGH) interval_duration *= 10000; else
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index c9eca90..f094491 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -462,8 +462,13 @@ static void set_link_state(struct dummy_hcd *dum_hcd) /* Report reset and disconnect events to the driver */ if (dum->ints_enabled && (disconnect || reset)) { - stop_activity(dum); ++dum->callback_usage; + /* + * stop_activity() can drop dum->lock, so it must + * not come between the dum->ints_enabled test + * and the ++dum->callback_usage. + */ + stop_activity(dum); spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); @@ -908,21 +913,6 @@ static int dummy_pullup(struct usb_gadget *_gadget, int value) spin_lock_irqsave(&dum->lock, flags); dum->pullup = (value != 0); set_link_state(dum_hcd); - if (value == 0) { - /* - * Emulate synchronize_irq(): wait for callbacks to finish. - * This seems to be the best place to emulate the call to - * synchronize_irq() that's in usb_gadget_remove_driver(). - * Doing it in dummy_udc_stop() would be too late since it - * is called after the unbind callback and unbind shouldn't - * be invoked until all the other callbacks are finished. - */ - while (dum->callback_usage > 0) { - spin_unlock_irqrestore(&dum->lock, flags); - usleep_range(1000, 2000); - spin_lock_irqsave(&dum->lock, flags); - } - } spin_unlock_irqrestore(&dum->lock, flags); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); @@ -945,6 +935,20 @@ static void dummy_udc_async_callbacks(struct usb_gadget *_gadget, bool enable) spin_lock_irq(&dum->lock); dum->ints_enabled = enable; + if (!enable) { + /* + * Emulate synchronize_irq(): wait for callbacks to finish. + * This has to happen after emulated interrupts are disabled + * (dum->ints_enabled is clear) and before the unbind callback, + * just like the call to synchronize_irq() in + * gadget/udc/core:gadget_unbind_driver(). + */ + while (dum->callback_usage > 0) { + spin_unlock_irq(&dum->lock); + usleep_range(1000, 2000); + spin_lock_irq(&dum->lock); + } + } spin_unlock_irq(&dum->lock); } @@ -1534,6 +1538,12 @@ static int transfer(struct dummy_hcd *dum_hcd, struct urb *urb, /* rescan to continue with any other queued i/o */ if (rescan) goto top; + + /* request not fully transferred; stop iterating to + * preserve data ordering across queued requests. + */ + if (req->req.actual < req->req.length) + break; } return sent; }
diff --git a/drivers/usb/host/ehci-brcm.c b/drivers/usb/host/ehci-brcm.c index 888e8f6..5e3156f 100644 --- a/drivers/usb/host/ehci-brcm.c +++ b/drivers/usb/host/ehci-brcm.c
@@ -31,8 +31,8 @@ static inline void ehci_brcm_wait_for_sof(struct ehci_hcd *ehci, u32 delay) int res; /* Wait for next microframe (every 125 usecs) */ - res = readl_relaxed_poll_timeout(&ehci->regs->frame_index, val, - val != frame_idx, 1, 130); + res = readl_relaxed_poll_timeout_atomic(&ehci->regs->frame_index, + val, val != frame_idx, 1, 130); if (res) ehci_err(ehci, "Error waiting for SOF\n"); udelay(delay);
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 890fc5e..ade178a 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c
@@ -386,11 +386,19 @@ static const struct file_operations port_fops = { static int xhci_portli_show(struct seq_file *s, void *unused) { struct xhci_port *port = s->private; - struct xhci_hcd *xhci = hcd_to_xhci(port->rhub->hcd); + struct xhci_hcd *xhci; u32 portli; portli = readl(&port->port_reg->portli); + /* port without protocol capability isn't added to a roothub */ + if (!port->rhub) { + seq_printf(s, "0x%08x\n", portli); + return 0; + } + + xhci = hcd_to_xhci(port->rhub->hcd); + /* PORTLI fields are valid if port is a USB3 or eUSB2V2 port */ if (port->rhub == &xhci->usb3_rhub) seq_printf(s, "0x%08x LEC=%u RLC=%u TLC=%u\n", portli,
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9315ba1..1cbefee 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c
@@ -3195,6 +3195,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (status & STS_HCE) { xhci_warn(xhci, "WARNING: Host Controller Error\n"); + xhci_halt(xhci); goto out; }
diff --git a/drivers/usb/host/xhci-sideband.c b/drivers/usb/host/xhci-sideband.c index abbcc0e..23153e1 100644 --- a/drivers/usb/host/xhci-sideband.c +++ b/drivers/usb/host/xhci-sideband.c
@@ -93,8 +93,6 @@ __xhci_sideband_remove_endpoint(struct xhci_sideband *sb, struct xhci_virt_ep *e static void __xhci_sideband_remove_interrupter(struct xhci_sideband *sb) { - struct usb_device *udev; - lockdep_assert_held(&sb->mutex); if (!sb->ir) @@ -102,10 +100,6 @@ __xhci_sideband_remove_interrupter(struct xhci_sideband *sb) xhci_remove_secondary_interrupter(xhci_to_hcd(sb->xhci), sb->ir); sb->ir = NULL; - udev = sb->vdev->udev; - - if (udev->state != USB_STATE_NOTATTACHED) - usb_offload_put(udev); } /* sideband api functions */ @@ -291,8 +285,8 @@ EXPORT_SYMBOL_GPL(xhci_sideband_get_event_buffer); * Allow other drivers, such as usb controller driver, to check if there are * any sideband activity on the host controller. This information could be used * for power management or other forms of resource management. The caller should - * ensure downstream usb devices are all either suspended or marked as - * "offload_at_suspend" to ensure the correctness of the return value. + * ensure downstream usb devices are all marked as "offload_pm_locked" to ensure + * the correctness of the return value. * * Returns true on any active sideband existence, false otherwise. */ @@ -328,9 +322,6 @@ int xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, bool ip_autoclear, u32 imod_interval, int intr_num) { - int ret = 0; - struct usb_device *udev; - if (!sb || !sb->xhci) return -ENODEV; @@ -348,12 +339,9 @@ xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, if (!sb->ir) return -ENOMEM; - udev = sb->vdev->udev; - ret = usb_offload_get(udev); - sb->ir->ip_autoclear = ip_autoclear; - return ret; + return 0; } EXPORT_SYMBOL_GPL(xhci_sideband_create_interrupter);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c36ab32..ef6d866 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c
@@ -4146,7 +4146,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) || (xhci->xhc_state & XHCI_STATE_HALTED)) { spin_unlock_irqrestore(&xhci->lock, flags); - kfree(command); + xhci_free_command(xhci, command); return -ENODEV; } @@ -4154,7 +4154,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) slot_id); if (ret) { spin_unlock_irqrestore(&xhci->lock, flags); - kfree(command); + xhci_free_command(xhci, command); return ret; } xhci_ring_cmd_db(xhci);
diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c index 8d8e79a..ca287b7 100644 --- a/drivers/usb/image/mdc800.c +++ b/drivers/usb/image/mdc800.c
@@ -707,7 +707,7 @@ static ssize_t mdc800_device_read (struct file *file, char __user *buf, size_t l if (signal_pending (current)) { mutex_unlock(&mdc800->io_lock); - return -EINTR; + return len == left ? -EINTR : len-left; } sts=left > (mdc800->out_count-mdc800->out_ptr)?mdc800->out_count-mdc800->out_ptr:left; @@ -730,9 +730,11 @@ static ssize_t mdc800_device_read (struct file *file, char __user *buf, size_t l mutex_unlock(&mdc800->io_lock); return len-left; } - wait_event_timeout(mdc800->download_wait, + retval = wait_event_timeout(mdc800->download_wait, mdc800->downloaded, msecs_to_jiffies(TO_DOWNLOAD_GET_READY)); + if (!retval) + usb_kill_urb(mdc800->download_urb); mdc800->downloaded = 0; if (mdc800->download_urb->status != 0) {
diff --git a/drivers/usb/misc/usbio.c b/drivers/usb/misc/usbio.c index 2e68d48..02d1e07 100644 --- a/drivers/usb/misc/usbio.c +++ b/drivers/usb/misc/usbio.c
@@ -614,8 +614,10 @@ static int usbio_probe(struct usb_interface *intf, const struct usb_device_id *i usb_fill_bulk_urb(usbio->urb, udev, usbio->rx_pipe, usbio->rxbuf, usbio->rxbuf_len, usbio_bulk_recv, usbio); ret = usb_submit_urb(usbio->urb, GFP_KERNEL); - if (ret) - return dev_err_probe(dev, ret, "Submitting usb urb\n"); + if (ret) { + dev_err_probe(dev, ret, "Submitting usb urb\n"); + goto err_free_urb; + } mutex_lock(&usbio->ctrl_mutex); @@ -663,6 +665,7 @@ static int usbio_probe(struct usb_interface *intf, const struct usb_device_id *i err_unlock: mutex_unlock(&usbio->ctrl_mutex); usb_kill_urb(usbio->urb); +err_free_urb: usb_free_urb(usbio->urb); return ret;
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index ec8bd968..a8af761 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c
@@ -736,7 +736,7 @@ static int uss720_probe(struct usb_interface *intf, ret = get_1284_register(pp, 0, ®, GFP_KERNEL); dev_dbg(&intf->dev, "reg: %7ph\n", priv->reg); if (ret < 0) - return ret; + goto probe_abort; ret = usb_find_last_int_in_endpoint(interface, &epd); if (!ret) {
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 9189e4b..7a482cd 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c
@@ -272,6 +272,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ dev->int_buffer, YUREX_BUF_SIZE, yurex_interrupt, dev, 1); dev->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + dev->bbu = -1; if (usb_submit_urb(dev->urb, GFP_KERNEL)) { retval = -EIO; dev_err(&interface->dev, "Could not submitting URB\n"); @@ -280,7 +281,6 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); - dev->bbu = -1; /* we can register the device now, as it is ready */ retval = usb_register_dev(interface, &yurex_class);
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index cf4a036..8c93bde 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c
@@ -815,6 +815,15 @@ static void usbhs_remove(struct platform_device *pdev) usbhs_platform_call(priv, hardware_exit, pdev); reset_control_assert(priv->rsts); + + /* + * Explicitly free the IRQ to ensure the interrupt handler is + * disabled and synchronized before freeing resources. + * devm_free_irq() calls free_irq() which waits for any running + * ISR to complete, preventing UAF. + */ + devm_free_irq(&pdev->dev, priv->irq, priv); + usbhs_mod_remove(priv); usbhs_fifo_remove(priv); usbhs_pipe_remove(priv);
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index b8e28ce..edec139 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c
@@ -139,9 +139,14 @@ static void *usb_role_switch_match(const struct fwnode_handle *fwnode, const cha static struct usb_role_switch * usb_role_switch_is_parent(struct fwnode_handle *fwnode) { - struct fwnode_handle *parent = fwnode_get_parent(fwnode); + struct fwnode_handle *parent; struct device *dev; + if (!fwnode_device_is_compatible(fwnode, "usb-b-connector")) + return NULL; + + parent = fwnode_get_parent(fwnode); + if (!fwnode_property_present(parent, "usb-role-switch")) { fwnode_handle_put(parent); return NULL;
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 58694b8..3f58891 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c
@@ -73,6 +73,7 @@ static const struct usb_device_id edgeport_4port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_22I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_COMPATIBLE) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_BLACKBOX_IC135A) }, { } }; @@ -121,6 +122,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8R) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8RR) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_8) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_BLACKBOX_IC135A) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0202) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0203) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0310) }, @@ -470,6 +472,7 @@ static void get_product_info(struct edgeport_serial *edge_serial) case ION_DEVICE_ID_EDGEPORT_2_DIN: case ION_DEVICE_ID_EDGEPORT_4_DIN: case ION_DEVICE_ID_EDGEPORT_16_DUAL_CPU: + case ION_DEVICE_ID_BLACKBOX_IC135A: product_info->IsRS232 = 1; break;
diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h index 9a6f742..c82a275 100644 --- a/drivers/usb/serial/io_usbvend.h +++ b/drivers/usb/serial/io_usbvend.h
@@ -211,6 +211,7 @@ // // Definitions for other product IDs +#define ION_DEVICE_ID_BLACKBOX_IC135A 0x0801 // OEM device (rebranded Edgeport/4) #define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device #define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e349ed6..3136121 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c
@@ -2441,6 +2441,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM815 and SRM825L */ { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825L */ { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM825WN (Diag) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825WN (AT) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825WN (NMEA) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ @@ -2461,6 +2464,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0302, 0xff) }, /* Rolling RW101R-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0802, 0xff), /* Rolling RW350-GL (laptop MBIM) */ .driver_info = RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x1003, 0xff) }, /* Rolling RW135R-GL (laptop MBIM) */ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Global */ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x40) },
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index d185688..35d9c30 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c
@@ -100,9 +100,14 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) { u8 pin_assign = 0; u32 conf; + u32 signal; /* DP Signalling */ - conf = (dp->data.conf & DP_CONF_SIGNALLING_MASK) >> DP_CONF_SIGNALLING_SHIFT; + signal = DP_CAP_DP_SIGNALLING(dp->port->vdo) & DP_CAP_DP_SIGNALLING(dp->alt->vdo); + if (dp->plug_prime) + signal &= DP_CAP_DP_SIGNALLING(dp->plug_prime->vdo); + + conf = signal << DP_CONF_SIGNALLING_SHIFT; switch (con) { case DP_STATUS_CON_DISABLED:
diff --git a/drivers/usb/typec/altmodes/thunderbolt.c b/drivers/usb/typec/altmodes/thunderbolt.c index c4c5da6..32250b9 100644 --- a/drivers/usb/typec/altmodes/thunderbolt.c +++ b/drivers/usb/typec/altmodes/thunderbolt.c
@@ -39,28 +39,7 @@ static bool tbt_ready(struct typec_altmode *alt); static int tbt_enter_mode(struct tbt_altmode *tbt) { - struct typec_altmode *plug = tbt->plug[TYPEC_PLUG_SOP_P]; - u32 vdo; - - vdo = tbt->alt->vdo & (TBT_VENDOR_SPECIFIC_B0 | TBT_VENDOR_SPECIFIC_B1); - vdo |= tbt->alt->vdo & TBT_INTEL_SPECIFIC_B0; - vdo |= TBT_MODE; - - if (plug) { - if (typec_cable_is_active(tbt->cable)) - vdo |= TBT_ENTER_MODE_ACTIVE_CABLE; - - vdo |= TBT_ENTER_MODE_CABLE_SPEED(TBT_CABLE_SPEED(plug->vdo)); - vdo |= plug->vdo & TBT_CABLE_ROUNDED; - vdo |= plug->vdo & TBT_CABLE_OPTICAL; - vdo |= plug->vdo & TBT_CABLE_RETIMER; - vdo |= plug->vdo & TBT_CABLE_LINK_TRAINING; - } else { - vdo |= TBT_ENTER_MODE_CABLE_SPEED(TBT_CABLE_USB3_PASSIVE); - } - - tbt->enter_vdo = vdo; - return typec_altmode_enter(tbt->alt, &vdo); + return typec_altmode_enter(tbt->alt, &tbt->enter_vdo); } static void tbt_altmode_work(struct work_struct *work) @@ -337,6 +316,7 @@ static bool tbt_ready(struct typec_altmode *alt) { struct tbt_altmode *tbt = typec_altmode_get_drvdata(alt); struct typec_altmode *plug; + u32 vdo; if (tbt->cable) return true; @@ -364,6 +344,26 @@ static bool tbt_ready(struct typec_altmode *alt) tbt->plug[i] = plug; } + vdo = tbt->alt->vdo & (TBT_VENDOR_SPECIFIC_B0 | TBT_VENDOR_SPECIFIC_B1); + vdo |= tbt->alt->vdo & TBT_INTEL_SPECIFIC_B0; + vdo |= TBT_MODE; + plug = tbt->plug[TYPEC_PLUG_SOP_P]; + + if (plug) { + if (typec_cable_is_active(tbt->cable)) + vdo |= TBT_ENTER_MODE_ACTIVE_CABLE; + + vdo |= TBT_ENTER_MODE_CABLE_SPEED(TBT_CABLE_SPEED(plug->vdo)); + vdo |= plug->vdo & TBT_CABLE_ROUNDED; + vdo |= plug->vdo & TBT_CABLE_OPTICAL; + vdo |= plug->vdo & TBT_CABLE_RETIMER; + vdo |= plug->vdo & TBT_CABLE_LINK_TRAINING; + } else { + vdo |= TBT_ENTER_MODE_CABLE_SPEED(TBT_CABLE_USB3_PASSIVE); + } + + tbt->enter_vdo = vdo; + return true; }
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 8314309..0977581 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c
@@ -686,10 +686,6 @@ typec_register_altmode(struct device *parent, alt->adev.dev.bus = &typec_bus; - /* Plug alt modes need a class to generate udev events. */ - if (is_typec_plug(parent)) - alt->adev.dev.class = &typec_class; - ret = device_register(&alt->adev.dev); if (ret) { dev_err(parent, "failed to register alternate mode (%d)\n",
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1d2f3af..8e0e14a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -7890,7 +7890,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) port->partner_desc.identity = &port->partner_ident; port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); - if (IS_ERR_OR_NULL(port->role_sw)) + if (!port->role_sw) port->role_sw = usb_role_switch_get(port->dev); if (IS_ERR(port->role_sw)) { err = PTR_ERR(port->role_sw);
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f38a4d7..8333bda 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -43,8 +43,13 @@ void ucsi_notify_common(struct ucsi *ucsi, u32 cci) if (cci & UCSI_CCI_BUSY) return; - if (UCSI_CCI_CONNECTOR(cci)) - ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); + if (UCSI_CCI_CONNECTOR(cci)) { + if (UCSI_CCI_CONNECTOR(cci) <= ucsi->cap.num_connectors) + ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); + else + dev_err(ucsi->dev, "bogus connector number in CCI: %lu\n", + UCSI_CCI_CONNECTOR(cci)); + } if (cci & UCSI_CCI_ACK_COMPLETE && test_and_clear_bit(ACK_PENDING, &ucsi->flags))
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 478beaf..b1d658b 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -301,11 +301,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, */ ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags); if (ret < 0) - goto err_dma_buf; + dma_buf_put(priv->dmabuf); + return ret; -err_dma_buf: - dma_buf_put(priv->dmabuf); err_dev_put: vfio_device_put_registration(&vdev->vdev); err_free_phys:
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index 1a04154..c54cfcd 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c
@@ -380,8 +380,12 @@ static struct au1100fb_panel known_lcd_panels[] = #define panel_is_color(panel) (panel->control_base & LCD_CONTROL_PC) #define panel_swap_rgb(panel) (panel->control_base & LCD_CONTROL_CCO) -#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_MIPS) -/* This is only defined to be able to compile this driver on non-mips platforms */ +#if defined(CONFIG_COMPILE_TEST) && (!defined(CONFIG_MIPS) || defined(CONFIG_64BIT)) +/* + * KSEG1ADDR() is defined in arch/mips/include/asm/addrspace.h + * for 32 bit configurations. Provide a stub for compile testing + * on other platforms. + */ #define KSEG1ADDR(x) (x) #endif
diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 4252b14..7cee975 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c
@@ -171,6 +171,8 @@ static void tdx_mr_deinit(const struct attribute_group *mr_grp) #define GET_QUOTE_SUCCESS 0 #define GET_QUOTE_IN_FLIGHT 0xffffffffffffffff +#define TDX_QUOTE_MAX_LEN (GET_QUOTE_BUF_SIZE - sizeof(struct tdx_quote_buf)) + /* struct tdx_quote_buf: Format of Quote request buffer. * @version: Quote format version, filled by TD. * @status: Status code of Quote request, filled by VMM. @@ -269,6 +271,7 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) u8 *buf; struct tdx_quote_buf *quote_buf = quote_data; struct tsm_report_desc *desc = &report->desc; + u32 out_len; int ret; u64 err; @@ -306,12 +309,17 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) return ret; } - buf = kvmemdup(quote_buf->data, quote_buf->out_len, GFP_KERNEL); + out_len = READ_ONCE(quote_buf->out_len); + + if (out_len > TDX_QUOTE_MAX_LEN) + return -EFBIG; + + buf = kvmemdup(quote_buf->data, out_len, GFP_KERNEL); if (!buf) return -ENOMEM; report->outblob = buf; - report->outblob_len = quote_buf->out_len; + report->outblob_len = out_len; /* * TODO: parse the PEM-formatted cert chain out of the quote buffer when
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 335692d..fbca7ce 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * - * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate - * that the CPU will not dirty any cacheline overlapping this buffer while it - * is available, and to suppress overlapping cacheline warnings in DMA debug - * builds. + * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES + * to indicate that the CPU will not dirty any cacheline overlapping this buffer + * while it is available, and to suppress overlapping cacheline warnings in DMA + * debug builds. * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). @@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, - DMA_ATTR_CPU_CACHE_CLEAN); + DMA_ATTR_DEBUGGING_IGNORE_CACHELINES); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1759cc1..15ba592 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c
@@ -12,6 +12,7 @@ #include <linux/eventfd.h> #include <linux/file.h> #include <linux/kernel.h> +#include <linux/kstrtox.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/poll.h> @@ -30,7 +31,10 @@ #include <linux/seq_file.h> #include <linux/miscdevice.h> #include <linux/moduleparam.h> +#include <linux/notifier.h> +#include <linux/security.h> #include <linux/virtio_mmio.h> +#include <linux/wait.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -46,6 +50,7 @@ #include <xen/page.h> #include <xen/xen-ops.h> #include <xen/balloon.h> +#include <xen/xenbus.h> #ifdef CONFIG_XEN_ACPI #include <xen/acpi.h> #endif @@ -68,10 +73,20 @@ module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, MODULE_PARM_DESC(dm_op_buf_max_size, "Maximum size of a dm_op hypercall buffer"); +static bool unrestricted; +module_param(unrestricted, bool, 0); +MODULE_PARM_DESC(unrestricted, + "Don't restrict hypercalls to target domain if running in a domU"); + struct privcmd_data { domid_t domid; }; +/* DOMID_INVALID implies no restriction */ +static domid_t target_domain = DOMID_INVALID; +static bool restrict_wait; +static DECLARE_WAIT_QUEUE_HEAD(restrict_wait_wq); + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, @@ -1563,13 +1578,16 @@ static long privcmd_ioctl(struct file *file, static int privcmd_open(struct inode *ino, struct file *file) { - struct privcmd_data *data = kzalloc_obj(*data); + struct privcmd_data *data; + if (wait_event_interruptible(restrict_wait_wq, !restrict_wait) < 0) + return -EINTR; + + data = kzalloc_obj(*data); if (!data) return -ENOMEM; - /* DOMID_INVALID implies no restriction */ - data->domid = DOMID_INVALID; + data->domid = target_domain; file->private_data = data; return 0; @@ -1662,6 +1680,52 @@ static struct miscdevice privcmd_dev = { .fops = &xen_privcmd_fops, }; +static int init_restrict(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + char *target; + unsigned int domid; + + /* Default to an guaranteed unused domain-id. */ + target_domain = DOMID_IDLE; + + target = xenbus_read(XBT_NIL, "target", "", NULL); + if (IS_ERR(target) || kstrtouint(target, 10, &domid)) { + pr_err("No target domain found, blocking all hypercalls\n"); + goto out; + } + + target_domain = domid; + + out: + if (!IS_ERR(target)) + kfree(target); + + restrict_wait = false; + wake_up_all(&restrict_wait_wq); + + return NOTIFY_DONE; +} + +static struct notifier_block xenstore_notifier = { + .notifier_call = init_restrict, +}; + +static void __init restrict_driver(void) +{ + if (unrestricted) { + if (security_locked_down(LOCKDOWN_XEN_USER_ACTIONS)) + pr_warn("Kernel is locked down, parameter \"unrestricted\" ignored\n"); + else + return; + } + + restrict_wait = true; + + register_xenstore_notifier(&xenstore_notifier); +} + static int __init privcmd_init(void) { int err; @@ -1669,6 +1733,9 @@ static int __init privcmd_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_initial_domain()) + restrict_driver(); + err = misc_register(&privcmd_dev); if (err != 0) { pr_err("Could not register Xen privcmd device\n"); @@ -1698,6 +1765,9 @@ static int __init privcmd_init(void) static void __exit privcmd_exit(void) { + if (!xen_initial_domain()) + unregister_xenstore_notifier(&xenstore_notifier); + privcmd_ioeventfd_exit(); privcmd_irqfd_exit(); misc_deregister(&privcmd_dev);
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 31903bf..897ae2a 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c
@@ -378,11 +378,8 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) acpi_psd[acpi_id].domain); } - status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); - if (ACPI_FAILURE(status)) { - if (!pblk) - return AE_OK; - } + if (!pblk && !acpi_has_method(handle, "_CST")) + return AE_OK; /* .. and it has a C-state */ __set_bit(acpi_id, acpi_id_cst_present);
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 22ff9cf..b34785b 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c
@@ -149,12 +149,12 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) mutex_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitialised) goto out; /* Wait for frontend to state that it has published the configuration */ - if (xenbus_read_driver_state(pdev->xdev->otherend) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->otherend) != XenbusStateInitialised) goto out; @@ -374,7 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev, dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); mutex_lock(&pdev->dev_lock); - if (xenbus_read_driver_state(pdev->xdev->nodename) != state) + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != state) goto out; err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", @@ -572,7 +572,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename) != XenbusStateInitWait) goto out; @@ -662,7 +662,7 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch, struct xen_pcibk_device *pdev = container_of(watch, struct xen_pcibk_device, be_watch); - switch (xenbus_read_driver_state(pdev->xdev->nodename)) { + switch (xenbus_read_driver_state(pdev->xdev, pdev->xdev->nodename)) { case XenbusStateInitWait: xen_pcibk_setup_backend(pdev); break;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0ab1329..27682cb 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c
@@ -226,8 +226,9 @@ __xenbus_switch_state(struct xenbus_device *dev, struct xenbus_transaction xbt; int current_state; int err, abort; + bool vanished = false; - if (state == dev->state) + if (state == dev->state || dev->vanished) return 0; again: @@ -242,6 +243,10 @@ __xenbus_switch_state(struct xenbus_device *dev, err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); if (err != 1) goto abort; + if (current_state != dev->state && current_state == XenbusStateInitialising) { + vanished = true; + goto abort; + } err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); if (err) { @@ -256,7 +261,7 @@ __xenbus_switch_state(struct xenbus_device *dev, if (err == -EAGAIN && !abort) goto again; xenbus_switch_fatal(dev, depth, err, "ending transaction"); - } else + } else if (!vanished) dev->state = state; return 0; @@ -931,14 +936,20 @@ static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) /** * xenbus_read_driver_state - read state from a store path + * @dev: xenbus device pointer * @path: path for driver * * Returns: the state of the driver rooted at the given store path, or * XenbusStateUnknown if no state can be read. */ -enum xenbus_state xenbus_read_driver_state(const char *path) +enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev, + const char *path) { enum xenbus_state result; + + if (dev && dev->vanished) + return XenbusStateUnknown; + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); if (err) result = XenbusStateUnknown;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 9f9011c..eb260ece 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -191,7 +191,7 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, return; } - state = xenbus_read_driver_state(dev->otherend); + state = xenbus_read_driver_state(dev, dev->otherend); dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", state, xenbus_strstate(state), dev->otherend_watch.node, path); @@ -364,7 +364,7 @@ void xenbus_dev_remove(struct device *_dev) * closed. */ if (!drv->allow_rebind || - xenbus_read_driver_state(dev->nodename) == XenbusStateClosing) + xenbus_read_driver_state(dev, dev->nodename) == XenbusStateClosing) xenbus_switch_state(dev, XenbusStateClosed); } EXPORT_SYMBOL_GPL(xenbus_dev_remove); @@ -444,6 +444,9 @@ static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) info.dev = NULL; bus_for_each_dev(bus, NULL, &info, cleanup_dev); if (info.dev) { + dev_warn(&info.dev->dev, + "device forcefully removed from xenstore\n"); + info.dev->vanished = true; device_unregister(&info.dev->dev); put_device(&info.dev->dev); } @@ -514,7 +517,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, size_t stringlen; char *tmpstring; - enum xenbus_state state = xenbus_read_driver_state(nodename); + enum xenbus_state state = xenbus_read_driver_state(NULL, nodename); if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a @@ -659,6 +662,39 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) return; dev = xenbus_device_find(root, &bus->bus); + /* + * Backend domain crash results in not coordinated frontend removal, + * without going through XenbusStateClosing. If this is a new instance + * of the same device Xen tools will have reset the state to + * XenbusStateInitializing. + * It might be that the backend crashed early during the init phase of + * device setup, in which case the known state would have been + * XenbusStateInitializing. So test the backend domid to match the + * saved one. In case the new backend happens to have the same domid as + * the old one, we can just carry on, as there is no inconsistency + * resulting in this case. + */ + if (dev && !strcmp(bus->root, "device")) { + enum xenbus_state state = xenbus_read_driver_state(dev, dev->nodename); + unsigned int backend = xenbus_read_unsigned(root, "backend-id", + dev->otherend_id); + + if (state == XenbusStateInitialising && + (state != dev->state || backend != dev->otherend_id)) { + /* + * State has been reset, assume the old one vanished + * and new one needs to be probed. + */ + dev_warn(&dev->dev, + "state reset occurred, reconnecting\n"); + dev->vanished = true; + } + if (dev->vanished) { + device_unregister(&dev->dev); + put_device(&dev->dev); + dev = NULL; + } + } if (!dev) xenbus_probe_node(bus, type, root); else
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index f04707d..ca04609 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -253,7 +253,7 @@ static int print_device_status(struct device *dev, void *data) } else if (xendev->state < XenbusStateConnected) { enum xenbus_state rstate = XenbusStateUnknown; if (xendev->otherend) - rstate = xenbus_read_driver_state(xendev->otherend); + rstate = xenbus_read_driver_state(xendev, xendev->otherend); pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n", xendev->nodename, xendev->state, rstate); }
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index a936f9e..63bf096 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c
@@ -298,8 +298,8 @@ int afs_merge_fs_addr4(struct afs_net *net, struct afs_addr_list *alist, srx.transport.sin.sin_addr.s_addr = xdr; peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); - if (!peer) - return -ENOMEM; + if (IS_ERR(peer)) + return PTR_ERR(peer); for (i = 0; i < alist->nr_ipv4; i++) { if (peer == alist->addrs[i].peer) { @@ -342,8 +342,8 @@ int afs_merge_fs_addr6(struct afs_net *net, struct afs_addr_list *alist, memcpy(&srx.transport.sin6.sin6_addr, xdr, 16); peer = rxrpc_kernel_lookup_peer(net->socket, &srx, GFP_KERNEL); - if (!peer) - return -ENOMEM; + if (IS_ERR(peer)) + return PTR_ERR(peer); for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { if (peer == alist->addrs[i].peer) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8e89cc5..fb857fa 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c
@@ -47,6 +47,7 @@ #include <linux/dax.h> #include <linux/uaccess.h> #include <uapi/linux/rseq.h> +#include <linux/rseq.h> #include <asm/param.h> #include <asm/page.h> @@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } #ifdef CONFIG_RSEQ NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end)); - NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq)); + NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align()); #endif #undef NEW_AUX_ENT /* AT_NULL is zero; clear the rest too */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 95b1d08..95b65aa 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c
@@ -595,6 +595,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, #ifdef ELF_HWCAP2 nitems++; #endif +#ifdef ELF_HWCAP3 + nitems++; +#endif +#ifdef ELF_HWCAP4 + nitems++; +#endif csp = sp; sp -= nitems * 2 * sizeof(unsigned long);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index af98421..0428557 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c
@@ -1393,6 +1393,13 @@ static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx, .indirect_missing_keys = PREFTREE_INIT }; + if (unlikely(!root)) { + btrfs_err(ctx->fs_info, + "missing extent root for extent at bytenr %llu", + ctx->bytenr); + return -EUCLEAN; + } + /* Roots ulist is not needed when using a sharedness check context. */ if (sc) ASSERT(ctx->roots == NULL); @@ -2204,6 +2211,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, struct btrfs_extent_item *ei; struct btrfs_key key; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + logical); + return -EUCLEAN; + } + key.objectid = logical; if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) key.type = BTRFS_METADATA_ITEM_KEY; @@ -2851,6 +2865,13 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) struct btrfs_key key; int ret; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; @@ -2987,6 +3008,13 @@ int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) /* We're at keyed items, there is no inline item, go to the next one */ extent_root = btrfs_extent_root(iter->fs_info, iter->bytenr); + if (unlikely(!extent_root)) { + btrfs_err(iter->fs_info, + "missing extent root for extent at bytenr %llu", + iter->bytenr); + return -EUCLEAN; + } + ret = btrfs_next_item(extent_root, iter->path); if (ret) return ret;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c284f48..c0d17a3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c
@@ -739,6 +739,12 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET); extent_root = btrfs_extent_root(fs_info, last); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + block_group->start); + return -EUCLEAN; + } #ifdef CONFIG_BTRFS_DEBUG /* @@ -1061,6 +1067,11 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans, int ret; root = btrfs_block_group_root(fs_info); + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; @@ -1349,6 +1360,11 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_chunk_map *map; unsigned int num_items; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return ERR_PTR(-EUCLEAN); + } + map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); ASSERT(map != NULL); ASSERT(map->start == chunk_offset); @@ -2140,6 +2156,11 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, int ret; struct btrfs_key found_key; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + btrfs_for_each_slot(root, key, &found_key, path, ret) { if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { @@ -2713,6 +2734,11 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, size_t size; int ret; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + spin_lock(&block_group->lock); btrfs_set_stack_block_group_v2_used(&bgi, block_group->used); btrfs_set_stack_block_group_v2_chunk_objectid(&bgi, block_group->global_root_id); @@ -3048,6 +3074,11 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, int ret; bool dirty_bg_running; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + /* * This can only happen when we are doing read-only scrub on read-only * mount. @@ -3192,6 +3223,11 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, u64 used, remap_bytes; u32 identity_remap_count; + if (unlikely(!root)) { + btrfs_err(fs_info, "missing block group root"); + return -EUCLEAN; + } + /* * Block group items update can be triggered out of commit transaction * critical section, thus we need a consistent view of used bytes. @@ -3340,7 +3376,6 @@ static int cache_save_setup(struct btrfs_block_group *block_group, btrfs_abort_transaction(trans, ret); goto out_put; } - WARN_ON(ret); /* We've already setup this transaction, go ahead and exit */ if (block_group->cache_generation == trans->transid && @@ -4548,7 +4583,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info) for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) { if (space_info->sub_group[i]) { check_removing_space_info(space_info->sub_group[i]); - kfree(space_info->sub_group[i]); + btrfs_sysfs_remove_space_info(space_info->sub_group[i]); space_info->sub_group[i] = NULL; } }
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 790518a..8519994 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c
@@ -320,10 +320,16 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize)); ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize)); - ASSERT(cb->writeback); + /* + * This flag determines if we should clear the writeback flag from the + * page cache. But this function is only utilized by encoded writes, it + * never goes through the page cache. + */ + ASSERT(!cb->writeback); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; + ASSERT(cb->bbio.bio.bi_iter.bi_size == ordered->disk_num_bytes); cb->compressed_len = ordered->disk_num_bytes; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; @@ -345,8 +351,7 @@ struct compressed_bio *btrfs_alloc_compressed_write(struct btrfs_inode *inode, cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE, end_bbio_compressed_write); cb->start = start; cb->len = len; - cb->writeback = true; - + cb->writeback = false; return cb; }
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index d97bbbd..56ff8af 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c
@@ -1657,7 +1657,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, if (unlikely(ret)) { btrfs_err(trans->fs_info, "failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d", - index, btrfs_root_id(node->root), node->inode_id, ret); + btrfs_root_id(node->root), node->inode_id, index, ret); btrfs_delayed_item_release_metadata(dir->root, item); btrfs_release_delayed_item(item); }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6fa15a..1b0eb24 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c
@@ -1591,7 +1591,7 @@ static int find_newest_super_backup(struct btrfs_fs_info *info) * this will bump the backup pointer by one when it is * done */ -static void backup_super_roots(struct btrfs_fs_info *info) +static int backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; @@ -1623,6 +1623,15 @@ static void backup_super_roots(struct btrfs_fs_info *info) struct btrfs_root *extent_root = btrfs_extent_root(info, 0); struct btrfs_root *csum_root = btrfs_csum_root(info, 0); + if (unlikely(!extent_root)) { + btrfs_err(info, "missing extent root for extent at bytenr 0"); + return -EUCLEAN; + } + if (unlikely(!csum_root)) { + btrfs_err(info, "missing csum root for extent at bytenr 0"); + return -EUCLEAN; + } + btrfs_set_backup_extent_root(root_backup, extent_root->node->start); btrfs_set_backup_extent_root_gen(root_backup, @@ -1670,6 +1679,8 @@ static void backup_super_roots(struct btrfs_fs_info *info) memcpy(&info->super_copy->super_roots, &info->super_for_commit->super_roots, sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); + + return 0; } /* @@ -1994,7 +2005,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, int level = btrfs_super_log_root_level(disk_super); if (unlikely(fs_devices->rw_devices == 0)) { - btrfs_warn(fs_info, "log replay required on RO media"); + btrfs_err(fs_info, "log replay required on RO media"); return -EIO; } @@ -2008,9 +2019,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, check.owner_root = BTRFS_TREE_LOG_OBJECTID; log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { - btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); log_tree_root->node = NULL; + btrfs_err(fs_info, "failed to read log tree with error: %d", ret); btrfs_put_root(log_tree_root); return ret; } @@ -2023,9 +2034,9 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); btrfs_put_root(log_tree_root); - if (ret) { - btrfs_handle_fs_error(fs_info, ret, - "Failed to recover log tree"); + if (unlikely(ret)) { + ASSERT(BTRFS_FS_ERROR(fs_info) != 0); + btrfs_err(fs_info, "failed to recover log trees with error: %d", ret); return ret; } @@ -2520,8 +2531,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, if (mirror_num >= 0 && btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) { - btrfs_err(fs_info, "super offset mismatch %llu != %u", - btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); + btrfs_err(fs_info, "super offset mismatch %llu != %llu", + btrfs_super_bytenr(sb), btrfs_sb_offset(mirror_num)); ret = -EINVAL; } @@ -2972,7 +2983,6 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ - btrfs_warn(fs_info, "failed to start uuid_rescan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } @@ -3188,7 +3198,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) { btrfs_err(fs_info, "cannot mount because of unknown incompat features (0x%llx)", - incompat); + incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP); return -EINVAL; } @@ -3220,7 +3230,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", - compat_ro); + compat_ro_unsupp); return -EINVAL; } @@ -3233,7 +3243,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_err(fs_info, "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", - compat_ro); + compat_ro_unsupp); return -EINVAL; } @@ -3595,7 +3605,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } } - btrfs_zoned_reserve_data_reloc_bg(fs_info); btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); @@ -3623,6 +3632,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_cleaner; } + /* + * Starts a transaction, must be called after the transaction kthread + * is initialized. + */ + btrfs_zoned_reserve_data_reloc_bg(fs_info); + ret = btrfs_read_qgroup_config(fs_info); if (ret) goto fail_trans_kthread; @@ -3642,7 +3657,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { ret = PTR_ERR(fs_info->fs_root); - btrfs_warn(fs_info, "failed to read fs tree: %d", ret); + btrfs_err(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } @@ -3663,8 +3678,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_info(fs_info, "checking UUID tree"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { - btrfs_warn(fs_info, - "failed to check the UUID tree: %d", ret); + btrfs_err(fs_info, "failed to check the UUID tree: %d", ret); close_ctree(fs_info); return ret; } @@ -4048,8 +4062,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) * not from fsync where the tree roots in fs_info have not * been consistent on disk. */ - if (max_mirrors == 0) - backup_super_roots(fs_info); + if (max_mirrors == 0) { + ret = backup_super_roots(fs_info); + if (ret < 0) + return ret; + } sb = fs_info->super_for_commit; dev_item = &sb->dev_item; @@ -4399,9 +4416,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) */ btrfs_flush_workqueue(fs_info->delayed_workers); - ret = btrfs_commit_super(fs_info); - if (ret) - btrfs_err(fs_info, "commit super ret %d", ret); + /* + * If the filesystem is shutdown, then an attempt to commit the + * super block (or any write) will just fail. Since we freeze + * the filesystem before shutting it down, the filesystem is in + * a consistent state and we don't need to commit super blocks. + */ + if (!btrfs_is_shutdown(fs_info)) { + ret = btrfs_commit_super(fs_info); + if (ret) + btrfs_err(fs_info, "commit super block returned %d", ret); + } } kthread_stop(fs_info->transaction_kthread);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 03cf9f2..098e641 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c
@@ -75,6 +75,12 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) struct btrfs_key key; BTRFS_PATH_AUTO_FREE(path); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", start); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -131,6 +137,12 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, key.offset = offset; extent_root = btrfs_extent_root(fs_info, bytenr); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) return ret; @@ -436,6 +448,12 @@ static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, int recow; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (parent) { key.type = BTRFS_SHARED_DATA_REF_KEY; @@ -477,7 +495,7 @@ static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != bytenr || key.type != BTRFS_EXTENT_DATA_REF_KEY) - return ret; + return -ENOENT; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_data_ref); @@ -510,6 +528,12 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, u32 num_refs; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (node->parent) { key.type = BTRFS_SHARED_DATA_REF_KEY; @@ -668,6 +692,12 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (parent) { key.type = BTRFS_SHARED_BLOCK_REF_KEY; @@ -692,6 +722,12 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; + if (unlikely(!root)) { + btrfs_err(trans->fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; if (node->parent) { key.type = BTRFS_SHARED_BLOCK_REF_KEY; @@ -782,6 +818,12 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); int needed; + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; @@ -1680,6 +1722,12 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, } root = btrfs_extent_root(fs_info, key.objectid); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + key.objectid); + return -EUCLEAN; + } again: ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { @@ -1926,8 +1974,15 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root; csum_root = btrfs_csum_root(fs_info, head->bytenr); - ret = btrfs_del_csums(trans, csum_root, head->bytenr, - head->num_bytes); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + head->bytenr); + ret = -EUCLEAN; + } else { + ret = btrfs_del_csums(trans, csum_root, head->bytenr, + head->num_bytes); + } } } @@ -2379,6 +2434,12 @@ static noinline int check_committed_ref(struct btrfs_inode *inode, int type; int ret; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)-1; @@ -2933,9 +2994,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) while (!TRANS_ABORTED(trans) && cached_state) { struct extent_state *next_state; - if (btrfs_test_opt(fs_info, DISCARD_SYNC)) + if (btrfs_test_opt(fs_info, DISCARD_SYNC)) { ret = btrfs_discard_extent(fs_info, start, end + 1 - start, NULL, true); + if (ret) { + btrfs_warn(fs_info, + "discard failed for extent [%llu, %llu]: errno=%d %s", + start, end, ret, btrfs_decode_error(ret)); + } + } next_state = btrfs_next_extent_state(unpin, cached_state); btrfs_clear_extent_dirty(unpin, start, end, &cached_state); @@ -3087,6 +3154,15 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, struct btrfs_root *csum_root; csum_root = btrfs_csum_root(trans->fs_info, bytenr); + if (unlikely(!csum_root)) { + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + bytenr); + return ret; + } + ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); @@ -3216,7 +3292,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, u64 delayed_ref_root = href->owning_root; extent_root = btrfs_extent_root(info, bytenr); - ASSERT(extent_root); + if (unlikely(!extent_root)) { + btrfs_err(info, + "missing extent root for extent at bytenr %llu", bytenr); + return -EUCLEAN; + } path = btrfs_alloc_path(); if (!path) @@ -4933,11 +5013,18 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY); size += btrfs_extent_inline_ref_size(type); + extent_root = btrfs_extent_root(fs_info, ins->objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + ins->objectid); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - extent_root = btrfs_extent_root(fs_info, ins->objectid); ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size); if (ret) { btrfs_free_path(path); @@ -5013,11 +5100,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, size += sizeof(*block_info); } + extent_root = btrfs_extent_root(fs_info, extent_key.objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + extent_key.objectid); + return -EUCLEAN; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - extent_root = btrfs_extent_root(fs_info, extent_key.objectid); ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key, size); if (ret) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 744a1ff..5f97a3d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c
@@ -4507,6 +4507,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) */ if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); break; }
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 927324a..ed8ecf4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c
@@ -308,6 +308,13 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info, /* Current item doesn't contain the desired range, search again */ btrfs_release_path(path); csum_root = btrfs_csum_root(fs_info, disk_bytenr); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0); if (IS_ERR(item)) { ret = PTR_ERR(item);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index ecddfca..9efd1ec 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c
@@ -1073,6 +1073,14 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, if (ret) return ret; + extent_root = btrfs_extent_root(trans->fs_info, block_group->start); + if (unlikely(!extent_root)) { + btrfs_err(trans->fs_info, + "missing extent root for block group at offset %llu", + block_group->start); + return -EUCLEAN; + } + mutex_lock(&block_group->free_space_lock); /* @@ -1086,7 +1094,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = 0; - extent_root = btrfs_extent_root(trans->fs_info, key.objectid); ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6efb543..f643a05 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c
@@ -1392,10 +1392,25 @@ static int cow_one_range(struct btrfs_inode *inode, struct folio *locked_folio, return ret; free_reserved: + /* + * If we have reserved an extent for the current range and failed to + * create the respective extent map or ordered extent, it means that + * when we reserved the extent we decremented the extent's size from + * the data space_info's bytes_may_use counter and + * incremented the space_info's bytes_reserved counter by the same + * amount. + * + * We must make sure extent_clear_unlock_delalloc() does not try + * to decrement again the data space_info's bytes_may_use counter, which + * will be handled by btrfs_free_reserved_extent(). + * + * Therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV, but only + * EXTENT_CLEAR_META_RESV. + */ extent_clear_unlock_delalloc(inode, file_offset, cur_end, locked_folio, cached, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | - EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); btrfs_qgroup_free_data(inode, NULL, file_offset, cur_len, NULL); @@ -1997,6 +2012,13 @@ static int can_nocow_file_extent(struct btrfs_path *path, */ csum_root = btrfs_csum_root(root->fs_info, io_start); + if (unlikely(!csum_root)) { + btrfs_err(root->fs_info, + "missing csum root for extent at bytenr %llu", io_start); + ret = -EUCLEAN; + goto out; + } + ret = btrfs_lookup_csums_list(csum_root, io_start, io_start + args->file_extent.num_bytes - 1, NULL, nowait); @@ -2734,10 +2756,17 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, int ret; list_for_each_entry(sum, list, list) { - trans->adding_csums = true; - if (!csum_root) + if (!csum_root) { csum_root = btrfs_csum_root(trans->fs_info, sum->logical); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + sum->logical); + return -EUCLEAN; + } + } + trans->adding_csums = true; ret = btrfs_csum_file_blocks(trans, csum_root, sum); trans->adding_csums = false; if (ret) @@ -4764,7 +4793,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", - btrfs_root_id(root)); + btrfs_root_id(dest)); ret = -EPERM; goto out_up_write; } @@ -6597,6 +6626,25 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, int ret; bool xa_reserved = false; + if (!args->orphan && !args->subvol) { + /* + * Before anything else, check if we can add the name to the + * parent directory. We want to avoid a dir item overflow in + * case we have an existing dir item due to existing name + * hash collisions. We do this check here before we call + * btrfs_add_link() down below so that we can avoid a + * transaction abort (which could be exploited by malicious + * users). + * + * For subvolumes we already do this in btrfs_mksubvol(). + */ + ret = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, + btrfs_ino(BTRFS_I(dir)), + name); + if (ret < 0) + return ret; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -9840,6 +9888,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, int compression; size_t orig_count; const u32 min_folio_size = btrfs_min_folio_size(fs_info); + const u32 blocksize = fs_info->sectorsize; u64 start, end; u64 num_bytes, ram_bytes, disk_num_bytes; struct btrfs_key ins; @@ -9950,9 +9999,9 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, ret = -EFAULT; goto out_cb; } - if (bytes < min_folio_size) - folio_zero_range(folio, bytes, min_folio_size - bytes); - ret = bio_add_folio(&cb->bbio.bio, folio, folio_size(folio), 0); + if (!IS_ALIGNED(bytes, blocksize)) + folio_zero_range(folio, bytes, round_up(bytes, blocksize) - bytes); + ret = bio_add_folio(&cb->bbio.bio, folio, round_up(bytes, blocksize), 0); if (unlikely(!ret)) { folio_put(folio); ret = -EINVAL;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ae21732..d75d31b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c
@@ -672,6 +672,13 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + /* + * Subvolumes have orphans cleaned on first dentry lookup. A new + * subvolume cannot have any orphans, so we should set the bit before we + * add the subvolume dentry to the dentry cache, so that it is in the + * same state as a subvolume after first lookup. + */ + set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &new_root->state); d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -3610,7 +3617,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) } } - trans = btrfs_join_transaction(root); + /* 2 BTRFS_QGROUP_RELATION_KEY items. */ + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -3682,7 +3690,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } - trans = btrfs_join_transaction(root); + /* + * 1 BTRFS_QGROUP_INFO_KEY item. + * 1 BTRFS_QGROUP_LIMIT_KEY item. + */ + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -3731,7 +3743,8 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) goto drop_write; } - trans = btrfs_join_transaction(root); + /* 1 BTRFS_QGROUP_LIMIT_KEY item. */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -3852,6 +3865,25 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, goto out; } + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + + /* + * Before we attempt to add the new received uuid, check if we have room + * for it in case there's already an item. If the size of the existing + * item plus this root's ID (u64) exceeds the maximum item size, we can + * return here without the need to abort a transaction. If we don't do + * this check, the btrfs_uuid_tree_add() call below would fail with + * -EOVERFLOW and result in a transaction abort. Malicious users could + * exploit this to turn the fs into RO mode. + */ + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_check_overflow(fs_info, sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL); + if (ret < 0) + goto out; + } + /* * 1 - root item * 2 - uuid items (received uuid + subvol uuid) @@ -3867,15 +3899,12 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; - received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, - BTRFS_UUID_SIZE); if (received_uuid_changed && !btrfs_is_empty_uuid(root_item->received_uuid)) { ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, btrfs_root_id(root)); if (unlikely(ret && ret != -ENOENT)) { - btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } @@ -3890,7 +3919,8 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); - if (ret < 0) { + if (unlikely(ret < 0)) { + btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } @@ -4581,7 +4611,7 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, { struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp)); struct extent_io_tree *io_tree = &inode->io_tree; - struct page **pages; + struct page **pages = NULL; struct btrfs_uring_priv *priv = NULL; unsigned long nr_pages; int ret; @@ -4639,6 +4669,11 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, btrfs_unlock_extent(io_tree, start, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); kfree(priv); + for (int i = 0; i < nr_pages; i++) { + if (pages[i]) + __free_page(pages[i]); + } + kfree(pages); return ret; }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 049a940b..79642e0 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c
@@ -429,7 +429,7 @@ static void copy_compressed_segment(struct compressed_bio *cb, int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { struct workspace *workspace = list_entry(ws, struct workspace, list); - const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; + struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info; const u32 sectorsize = fs_info->sectorsize; struct folio_iter fi; char *kaddr; @@ -447,7 +447,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) /* There must be a compressed folio and matches the sectorsize. */ if (unlikely(!fi.folio)) return -EINVAL; - ASSERT(folio_size(fi.folio) == sectorsize); + ASSERT(folio_size(fi.folio) == btrfs_min_folio_size(fs_info)); kaddr = kmap_local_folio(fi.folio, 0); len_in = read_compress_length(kaddr); kunmap_local(kaddr);
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h index 943e539..c8e92ef 100644 --- a/fs/btrfs/messages.h +++ b/fs/btrfs/messages.h
@@ -31,9 +31,6 @@ void _btrfs_printk(const struct btrfs_fs_info *fs_info, unsigned int level, cons #define btrfs_printk_in_rcu(fs_info, level, fmt, args...) \ btrfs_no_printk(fs_info, fmt, ##args) -#define btrfs_printk_in_rcu(fs_info, level, fmt, args...) \ - btrfs_no_printk(fs_info, fmt, ##args) - #define btrfs_printk_rl_in_rcu(fs_info, level, fmt, args...) \ btrfs_no_printk(fs_info, fmt, ##args)
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f189bf0..b7dfe87 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c
@@ -38,6 +38,7 @@ static const struct root_name_map root_map[] = { { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" }, { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, { BTRFS_RAID_STRIPE_TREE_OBJECTID, "RAID_STRIPE_TREE" }, + { BTRFS_REMAP_TREE_OBJECTID, "REMAP_TREE" }, }; const char *btrfs_root_name(const struct btrfs_key *key, char *buf) @@ -415,6 +416,9 @@ static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size [BTRFS_UUID_KEY_SUBVOL] = "UUID_KEY_SUBVOL", [BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL", [BTRFS_RAID_STRIPE_KEY] = "RAID_STRIPE", + [BTRFS_IDENTITY_REMAP_KEY] = "IDENTITY_REMAP", + [BTRFS_REMAP_KEY] = "REMAP", + [BTRFS_REMAP_BACKREF_KEY] = "REMAP_BACKREF", }; if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID) @@ -435,6 +439,7 @@ void btrfs_print_leaf(const struct extent_buffer *l) struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; struct btrfs_dev_extent *dev_extent; + struct btrfs_remap_item *remap; struct btrfs_key key; if (!l) @@ -569,6 +574,11 @@ void btrfs_print_leaf(const struct extent_buffer *l) print_raid_stripe_key(l, btrfs_item_size(l, i), btrfs_item_ptr(l, i, struct btrfs_stripe_extent)); break; + case BTRFS_REMAP_KEY: + case BTRFS_REMAP_BACKREF_KEY: + remap = btrfs_item_ptr(l, i, struct btrfs_remap_item); + pr_info("\t\taddress %llu\n", btrfs_remap_address(l, remap)); + break; } } }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3cdd975..41589ce 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c
@@ -370,7 +370,7 @@ static bool squota_check_parent_usage(struct btrfs_fs_info *fs_info, struct btrf nr_members++; } mismatch = (parent->excl != excl_sum || parent->rfer != rfer_sum || - parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != excl_cmpr_sum); + parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != rfer_cmpr_sum); WARN(mismatch, "parent squota qgroup %hu/%llu has mismatched usage from its %d members. " @@ -3739,6 +3739,14 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_rescan_lock); extent_root = btrfs_extent_root(fs_info, fs_info->qgroup_rescan_progress.objectid); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + fs_info->qgroup_rescan_progress.objectid); + mutex_unlock(&fs_info->qgroup_rescan_lock); + return -EUCLEAN; + } + ret = btrfs_search_slot_for_read(extent_root, &fs_info->qgroup_rescan_progress, path, 1, 0);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index b4511f5..02105d6 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c
@@ -2297,8 +2297,7 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, static void fill_data_csums(struct btrfs_raid_bio *rbio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; - struct btrfs_root *csum_root = btrfs_csum_root(fs_info, - rbio->bioc->full_stripe_logical); + struct btrfs_root *csum_root; const u64 start = rbio->bioc->full_stripe_logical; const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << fs_info->sectorsize_bits; @@ -2331,6 +2330,15 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) goto error; } + csum_root = btrfs_csum_root(fs_info, rbio->bioc->full_stripe_logical); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + rbio->bioc->full_stripe_logical); + ret = -EUCLEAN; + goto error; + } + ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1, rbio->csum_buf, rbio->csum_bitmap); if (ret < 0)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 95db7c4..033f74f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c
@@ -4185,6 +4185,8 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, dest_addr = ins.objectid; dest_length = ins.offset; + dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); + if (!is_data && !IS_ALIGNED(dest_length, fs_info->nodesize)) { u64 new_length = ALIGN_DOWN(dest_length, fs_info->nodesize); @@ -4295,15 +4297,12 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, if (unlikely(ret)) goto end; - dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); - adjust_block_group_remap_bytes(trans, dest_bg, dest_length); mutex_lock(&dest_bg->free_space_lock); bg_needs_free_space = test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &dest_bg->runtime_flags); mutex_unlock(&dest_bg->free_space_lock); - btrfs_put_block_group(dest_bg); if (bg_needs_free_space) { ret = btrfs_add_block_group_free_space(trans, dest_bg); @@ -4333,13 +4332,13 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, btrfs_end_transaction(trans); } } else { - dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); btrfs_free_reserved_bytes(dest_bg, dest_length, 0); - btrfs_put_block_group(dest_bg); ret = btrfs_commit_transaction(trans); } + btrfs_put_block_group(dest_bg); + return ret; } @@ -4399,6 +4398,8 @@ static int move_existing_remaps(struct btrfs_fs_info *fs_info, leaf = path->nodes[0]; } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); } remap = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item); @@ -4723,6 +4724,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, ret = btrfs_remove_dev_extents(trans, chunk_map); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } @@ -4732,6 +4734,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, if (unlikely(ret)) { mutex_unlock(&trans->fs_info->chunk_mutex); btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } } @@ -4750,6 +4753,7 @@ int btrfs_last_identity_remap_gone(struct btrfs_chunk_map *chunk_map, ret = remove_chunk_stripes(trans, chunk_map, path); if (unlikely(ret)) { btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); return ret; } @@ -4949,6 +4953,12 @@ static int do_remap_reloc_trans(struct btrfs_fs_info *fs_info, struct btrfs_space_info *sinfo = src_bg->space_info; extent_root = btrfs_extent_root(fs_info, src_bg->start); + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + src_bg->start); + return -EUCLEAN; + } trans = btrfs_start_transaction(extent_root, 0); if (IS_ERR(trans)) @@ -5301,6 +5311,13 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, int ret; bool bg_is_ro = false; + if (unlikely(!extent_root)) { + btrfs_err(fs_info, + "missing extent root for block group at offset %llu", + group_start); + return -EUCLEAN; + } + /* * This only gets set if we had a half-deleted snapshot on mount. We * cannot allow relocation to start while we're still trying to clean up @@ -5531,12 +5548,17 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info) goto out; } + rc->extent_root = btrfs_extent_root(fs_info, 0); + if (unlikely(!rc->extent_root)) { + btrfs_err(fs_info, "missing extent root for extent at bytenr 0"); + ret = -EUCLEAN; + goto out; + } + ret = reloc_chunk_start(fs_info); if (ret < 0) goto out_end; - rc->extent_root = btrfs_extent_root(fs_info, 0); - set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); @@ -5631,6 +5653,14 @@ int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered) LIST_HEAD(list); int ret; + if (unlikely(!csum_root)) { + btrfs_mark_ordered_extent_error(ordered); + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, disk_bytenr + ordered->num_bytes - 1, &list, false); @@ -5982,6 +6012,9 @@ static int remove_range_from_remap_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group *dest_bg; dest_bg = btrfs_lookup_block_group(fs_info, new_addr); + if (unlikely(!dest_bg)) + return -EUCLEAN; + adjust_block_group_remap_bytes(trans, dest_bg, -overlap_length); btrfs_put_block_group(dest_bg); ret = btrfs_add_to_free_space_tree(trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 81022d9..bc94bbc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c
@@ -743,7 +743,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr btrfs_warn_rl(fs_info, "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, - header->fsid, fs_info->fs_devices->fsid); + header->fsid, fs_info->fs_devices->metadata_uuid); return; } if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 52a267a..87cbc05 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c
@@ -2194,8 +2194,11 @@ void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info) if (!btrfs_should_periodic_reclaim(space_info)) continue; for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) { - if (do_reclaim_sweep(space_info, raid)) + if (do_reclaim_sweep(space_info, raid)) { + spin_lock(&space_info->lock); btrfs_set_periodic_reclaim_ready(space_info, false); + spin_unlock(&space_info->lock); + } } } }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7ef8c9b..8dd77c4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c
@@ -1905,6 +1905,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, objectid); + /* + * We are creating of lot of snapshots of the same root that was + * received (has a received UUID) and reached a leaf's limit for + * an item. We can safely ignore this and avoid a transaction + * abort. A deletion of this snapshot will still work since we + * ignore if an item with a BTRFS_UUID_KEY_RECEIVED_SUBVOL key + * is missing (see btrfs_delete_subvolume()). Send/receive will + * work too since it peeks the first root id from the existing + * item (it could peek any), and in case it's missing it + * falls back to search by BTRFS_UUID_KEY_SUBVOL keys. + * Creation of a snapshot does not require CAP_SYS_ADMIN, so + * we don't want users triggering transaction aborts, either + * intentionally or not. + */ + if (ret == -EOVERFLOW) + ret = 0; if (unlikely(ret && ret != -EEXIST)) { btrfs_abort_transaction(trans, ret); goto fail;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 452394b..b4e114e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c
@@ -1284,10 +1284,27 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, } if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) { generic_err(leaf, slot, - "invalid root level, have %u expect [0, %u]", + "invalid root drop_level, have %u expect [0, %u]", btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); return -EUCLEAN; } + /* + * If drop_progress.objectid is non-zero, a btrfs_drop_snapshot() was + * interrupted and the resume point was recorded in drop_progress and + * drop_level. In that case drop_level must be >= 1: level 0 is the + * leaf level and drop_snapshot never saves a checkpoint there (it + * only records checkpoints at internal node levels in DROP_REFERENCE + * stage). A zero drop_level combined with a non-zero drop_progress + * objectid indicates on-disk corruption and would cause a BUG_ON in + * merge_reloc_root() and btrfs_drop_snapshot() at mount time. + */ + if (unlikely(btrfs_disk_key_objectid(&ri.drop_progress) != 0 && + btrfs_root_drop_level(&ri) == 0)) { + generic_err(leaf, slot, + "invalid root drop_level 0 with non-zero drop_progress objectid %llu", + btrfs_disk_key_objectid(&ri.drop_progress)); + return -EUCLEAN; + } /* Flags check */ if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) { @@ -1740,7 +1757,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf, objectid > BTRFS_LAST_FREE_OBJECTID)) { extent_err(leaf, slot, "invalid extent data backref objectid value %llu", - root); + objectid); return -EUCLEAN; } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { @@ -1921,7 +1938,7 @@ static int check_dev_extent_item(const struct extent_buffer *leaf, if (unlikely(prev_key->offset + prev_len > key->offset)) { generic_err(leaf, slot, "dev extent overlap, prev offset %llu len %llu current offset %llu", - prev_key->objectid, prev_len, key->offset); + prev_key->offset, prev_len, key->offset); return -EUCLEAN; } }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 780a06d..ac871ef 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c
@@ -984,6 +984,13 @@ static noinline int replay_one_extent(struct walk_control *wc) sums = list_first_entry(&ordered_sums, struct btrfs_ordered_sum, list); csum_root = btrfs_csum_root(fs_info, sums->logical); + if (unlikely(!csum_root)) { + btrfs_err(fs_info, + "missing csum root for extent at bytenr %llu", + sums->logical); + ret = -EUCLEAN; + } + if (!ret) { ret = btrfs_del_csums(trans, csum_root, sums->logical, sums->len); @@ -4609,21 +4616,32 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct inode *inode, bool log_inode_only, u64 logged_isize) { + u64 gen = BTRFS_I(inode)->generation; u64 flags; if (log_inode_only) { - /* set the generation to zero so the recover code - * can tell the difference between an logging - * just to say 'this inode exists' and a logging - * to say 'update this inode with these values' + /* + * Set the generation to zero so the recover code can tell the + * difference between a logging just to say 'this inode exists' + * and a logging to say 'update this inode with these values'. + * But only if the inode was not already logged before. + * We access ->logged_trans directly since it was already set + * up in the call chain by btrfs_log_inode(), and data_race() + * to avoid false alerts from KCSAN and since it was set already + * and one can set it to 0 since that only happens on eviction + * and we are holding a ref on the inode. */ - btrfs_set_inode_generation(leaf, item, 0); + ASSERT(data_race(BTRFS_I(inode)->logged_trans) > 0); + if (data_race(BTRFS_I(inode)->logged_trans) < trans->transid) + gen = 0; + btrfs_set_inode_size(leaf, item, logged_isize); } else { - btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); btrfs_set_inode_size(leaf, item, inode->i_size); } + btrfs_set_inode_generation(leaf, item, gen); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); btrfs_set_inode_mode(leaf, item, inode->i_mode); @@ -4890,6 +4908,13 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, } csum_root = btrfs_csum_root(trans->fs_info, disk_bytenr); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + disk_bytenr); + return -EUCLEAN; + } + disk_bytenr += extent_offset; ret = btrfs_lookup_csums_list(csum_root, disk_bytenr, disk_bytenr + extent_num_bytes - 1, @@ -5086,6 +5111,13 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, /* block start is already adjusted for the file extent offset. */ block_start = btrfs_extent_map_block_start(em); csum_root = btrfs_csum_root(trans->fs_info, block_start); + if (unlikely(!csum_root)) { + btrfs_err(trans->fs_info, + "missing csum root for extent at bytenr %llu", + block_start); + return -EUCLEAN; + } + ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset, block_start + csum_offset + csum_len - 1, &ordered_sums, false); @@ -5427,42 +5459,63 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, return 0; } -static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, - struct btrfs_path *path, u64 *size_ret) +static int get_inode_size_to_log(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path, u64 *size_ret) { struct btrfs_key key; + struct btrfs_inode_item *item; int ret; key.objectid = btrfs_ino(inode); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); - if (ret < 0) { - return ret; - } else if (ret > 0) { - *size_ret = 0; - } else { - struct btrfs_inode_item *item; + /* + * Our caller called inode_logged(), so logged_trans is up to date. + * Use data_race() to silence any warning from KCSAN. Once logged_trans + * is set, it can only be reset to 0 after inode eviction. + */ + if (data_race(inode->logged_trans) == trans->transid) { + ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0); + } else if (inode->generation < trans->transid) { + path->search_commit_root = true; + path->skip_locking = true; + ret = btrfs_search_slot(NULL, inode->root, &key, path, 0, 0); + path->search_commit_root = false; + path->skip_locking = false; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - *size_ret = btrfs_inode_size(path->nodes[0], item); - /* - * If the in-memory inode's i_size is smaller then the inode - * size stored in the btree, return the inode's i_size, so - * that we get a correct inode size after replaying the log - * when before a power failure we had a shrinking truncate - * followed by addition of a new name (rename / new hard link). - * Otherwise return the inode size from the btree, to avoid - * data loss when replaying a log due to previously doing a - * write that expands the inode's size and logging a new name - * immediately after. - */ - if (*size_ret > inode->vfs_inode.i_size) - *size_ret = inode->vfs_inode.i_size; + } else { + *size_ret = 0; + return 0; } + /* + * If the inode was logged before or is from a past transaction, then + * its inode item must exist in the log root or in the commit root. + */ + ASSERT(ret <= 0); + if (WARN_ON_ONCE(ret > 0)) + ret = -ENOENT; + + if (ret < 0) + return ret; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode size stored + * in the btree, return the inode's i_size, so that we get a correct + * inode size after replaying the log when before a power failure we had + * a shrinking truncate followed by addition of a new name (rename / new + * hard link). Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a write that + * expands the inode's size and logging a new name immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; + btrfs_release_path(path); return 0; } @@ -6195,6 +6248,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_log_ctx *ctx) { + const bool orig_log_new_dentries = ctx->log_new_dentries; int ret = 0; /* @@ -6256,7 +6310,11 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * dir index key range logged for the directory. So we * must make sure the deletion is recorded. */ + ctx->log_new_dentries = false; ret = btrfs_log_inode(trans, inode, LOG_INODE_ALL, ctx); + if (!ret && ctx->log_new_dentries) + ret = log_new_dir_dentries(trans, inode, ctx); + btrfs_add_delayed_iput(inode); if (ret) break; @@ -6291,6 +6349,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, break; } + ctx->log_new_dentries = orig_log_new_dentries; ctx->logging_conflict_inodes = false; if (ret) free_conflicting_inodes(ctx); @@ -6969,7 +7028,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = drop_inode_items(trans, log, path, inode, BTRFS_XATTR_ITEM_KEY); } else { - if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) { + if (inode_only == LOG_INODE_EXISTS) { /* * Make sure the new inode item we write to the log has * the same isize as the current one (if it exists). @@ -6983,7 +7042,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - ret = logged_inode_size(log, inode, path, &logged_isize); + ret = get_inode_size_to_log(trans, inode, path, &logged_isize); if (ret) goto out_unlock; }
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index f24c14b..43c17a1 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c
@@ -199,6 +199,44 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 return 0; } +/* + * Check if we can add one root ID to a UUID key. + * If the key does not yet exists, we can, otherwise only if extended item does + * not exceeds the maximum item size permitted by the leaf size. + * + * Returns 0 on success, negative value on error. + */ +int btrfs_uuid_tree_check_overflow(struct btrfs_fs_info *fs_info, + const u8 *uuid, u8 type) +{ + BTRFS_PATH_AUTO_FREE(path); + int ret; + u32 item_size; + struct btrfs_key key; + + if (WARN_ON_ONCE(!fs_info->uuid_root)) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + btrfs_uuid_to_key(uuid, type, &key); + ret = btrfs_search_slot(NULL, fs_info->uuid_root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret > 0) + return 0; + + item_size = btrfs_item_size(path->nodes[0], path->slots[0]); + + if (sizeof(struct btrfs_item) + item_size + sizeof(u64) > + BTRFS_LEAF_DATA_SIZE(fs_info)) + return -EOVERFLOW; + + return 0; +} + static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, u64 subid) {
diff --git a/fs/btrfs/uuid-tree.h b/fs/btrfs/uuid-tree.h index c60ad20..02b235a 100644 --- a/fs/btrfs/uuid-tree.h +++ b/fs/btrfs/uuid-tree.h
@@ -12,6 +12,8 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ u64 subid); int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type, u64 subid); +int btrfs_uuid_tree_check_overflow(struct btrfs_fs_info *fs_info, + const u8 *uuid, u8 type); int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_uuid_scan_kthread(void *data);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fb0c4c..6b8e810 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c
@@ -3587,7 +3587,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset, bool v /* step one, relocate all the extents inside this chunk */ btrfs_scrub_pause(fs_info); - ret = btrfs_relocate_block_group(fs_info, chunk_offset, true); + ret = btrfs_relocate_block_group(fs_info, chunk_offset, verbose); btrfs_scrub_continue(fs_info); if (ret) { /* @@ -4277,20 +4277,29 @@ static int balance_remap_chunks(struct btrfs_fs_info *fs_info, struct btrfs_path end: while (!list_empty(chunks)) { bool is_unused; + struct btrfs_block_group *bg; rci = list_first_entry(chunks, struct remap_chunk_info, list); - spin_lock(&rci->bg->lock); - is_unused = !btrfs_is_block_group_used(rci->bg); - spin_unlock(&rci->bg->lock); + bg = rci->bg; + if (bg) { + /* + * This is a bit racy and the 'used' status can change + * but this is not a problem as later functions will + * verify it again. + */ + spin_lock(&bg->lock); + is_unused = !btrfs_is_block_group_used(bg); + spin_unlock(&bg->lock); - if (is_unused) - btrfs_mark_bg_unused(rci->bg); + if (is_unused) + btrfs_mark_bg_unused(bg); - if (rci->made_ro) - btrfs_dec_block_group_ro(rci->bg); + if (rci->made_ro) + btrfs_dec_block_group_ro(bg); - btrfs_put_block_group(rci->bg); + btrfs_put_block_group(bg); + } list_del(&rci->list); kfree(rci); @@ -6907,7 +6916,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ret = btrfs_translate_remap(fs_info, &new_logical, length); if (ret) - return ret; + goto out; if (new_logical != logical) { btrfs_free_chunk_map(map); @@ -6921,8 +6930,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } num_copies = btrfs_chunk_map_num_copies(map); - if (io_geom.mirror_num > num_copies) - return -EINVAL; + if (io_geom.mirror_num > num_copies) { + ret = -EINVAL; + goto out; + } map_offset = logical - map->start; io_geom.raid56_full_stripe_start = (u64)-1; @@ -8088,8 +8099,9 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans) smp_rmb(); ret = update_dev_stat_item(trans, device); - if (!ret) - atomic_sub(stats_cnt, &device->dev_stats_ccnt); + if (ret) + break; + atomic_sub(stats_cnt, &device->dev_stats_ccnt); } mutex_unlock(&fs_devices->device_list_mutex);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 1b7544a..c676e71 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c
@@ -308,7 +308,9 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb) } /* Queue the remaining part of the folio. */ if (workspace->strm.total_out > bio->bi_iter.bi_size) { - u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out); + const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size; + + ASSERT(cur_len <= folio_size(out_folio)); if (!bio_add_folio(bio, out_folio, cur_len, 0)) { ret = -E2BIG;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 39930d9..0cd7fd3 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c
@@ -337,7 +337,10 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) if (!btrfs_fs_incompat(fs_info, ZONED)) return 0; - mutex_lock(&fs_devices->device_list_mutex); + /* + * No need to take the device_list mutex here, we're still in the mount + * path and devices cannot be added to or removed from the list yet. + */ list_for_each_entry(device, &fs_devices->devices, dev_list) { /* We can skip reading of zone info for missing devices */ if (!device->bdev) @@ -347,7 +350,6 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) if (ret) break; } - mutex_unlock(&fs_devices->device_list_mutex); return ret; } @@ -1259,6 +1261,13 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, key.offset = 0; root = btrfs_extent_root(fs_info, key.objectid); + if (unlikely(!root)) { + btrfs_err(fs_info, + "missing extent root for extent at bytenr %llu", + key.objectid); + return -EUCLEAN; + } + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); /* We should not find the exact match */ if (unlikely(!ret))
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 6f076fa..3e847b9 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c
@@ -600,7 +600,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) bio_first_folio(&fi, &cb->bbio.bio, 0); if (unlikely(!fi.folio)) return -EINVAL; - ASSERT(folio_size(fi.folio) == blocksize); + ASSERT(folio_size(fi.folio) == min_folio_size); stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e87b3bb..2090fc7 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c
@@ -1326,7 +1326,6 @@ void ceph_process_folio_batch(struct address_space *mapping, continue; } else if (rc == -E2BIG) { folio_unlock(folio); - ceph_wbc->fbatch.folios[i] = NULL; break; }
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f3fe786..7dc3077 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c
@@ -79,7 +79,7 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL; @@ -98,7 +98,7 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 86d7aa5..bac9cfb 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c
@@ -1339,6 +1339,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); struct dentry *dn; @@ -1363,7 +1364,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (!dn) { try_async = false; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false; @@ -1424,7 +1425,19 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) * We have enough caps, so we assume that the unlink * will succeed. Fix up the target inode and dcache. */ - drop_nlink(inode); + + /* + * Protect the i_nlink update with i_ceph_lock + * to precent racing against ceph_fill_inode() + * handling our completion on a worker thread + * and don't decrement if i_nlink has already + * been updated to zero by this completion. + */ + spin_lock(&ci->i_ceph_lock); + if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&ci->i_ceph_lock); + d_delete(dentry); } else { spin_lock(&fsc->async_unlink_conflict_lock);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66bbf6d..5e7c73a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c
@@ -397,7 +397,7 @@ int ceph_open(struct inode *inode, struct file *file) if (!dentry) { do_sync = true; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true; @@ -807,7 +807,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (!dn) { try_async = false; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d76f9a7..d99e12d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c
@@ -2551,7 +2551,7 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, if (!dentry) { do_sync = true; } else { - struct ceph_path_info path_info; + struct ceph_path_info path_info = {0}; path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 23b6d00..b174627 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c
@@ -2768,6 +2768,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, if (ret < 0) { dput(parent); dput(cur); + __putname(path); return ERR_PTR(ret); } @@ -2777,6 +2778,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, if (len < 0) { dput(parent); dput(cur); + __putname(path); return ERR_PTR(len); } } @@ -2813,6 +2815,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, * cannot ever succeed. Creating paths that long is * possible with Ceph, but Linux cannot use them. */ + __putname(path); return ERR_PTR(-ENAMETOOLONG); }
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index a9f645f..97c48eb 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig
@@ -16,22 +16,36 @@ select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help - EROFS (Enhanced Read-Only File System) is a lightweight read-only - file system with modern designs (e.g. no buffer heads, inline - xattrs/data, chunk-based deduplication, multiple devices, etc.) for - scenarios which need high-performance read-only solutions, e.g. - smartphones with Android OS, LiveCDs and high-density hosts with - numerous containers; + EROFS (Enhanced Read-Only File System) is a modern, lightweight, + secure read-only filesystem for various use cases, such as immutable + system images, container images, application sandboxes, and datasets. - It also provides transparent compression and deduplication support to - improve storage density and maintain relatively high compression - ratios, and it implements in-place decompression to temporarily reuse - page cache for compressed data using proper strategies, which is - quite useful for ensuring guaranteed end-to-end runtime decompression + EROFS uses a flexible, hierarchical on-disk design so that features + can be enabled on demand: the core on-disk format is block-aligned in + order to perform optimally on all kinds of devices, including block + and memory-backed devices; the format is easy to parse and has zero + metadata redundancy, unlike generic filesystems, making it ideal for + filesystem auditing and remote access; inline data, random-access + friendly directory data, inline/shared extended attributes and + chunk-based deduplication ensure space efficiency while maintaining + high performance. + + Optionally, it supports multiple devices to reference external data, + enabling data sharing for container images. + + It also has advanced encoded on-disk layouts, particularly for data + compression and fine-grained deduplication. It utilizes fixed-size + output compression to improve storage density while keeping relatively + high compression ratios. Furthermore, it implements in-place + decompression to reuse file pages to keep compressed data temporarily + with proper strategies, which ensures guaranteed end-to-end runtime performance under extreme memory pressure without extra cost. - See the documentation at <file:Documentation/filesystems/erofs.rst> - and the web pages at <https://erofs.docs.kernel.org> for more details. + For more details, see the web pages at <https://erofs.docs.kernel.org> + and the documentation at <file:Documentation/filesystems/erofs.rst>. + + To compile EROFS filesystem support as a module, choose M here. The + module will be called erofs. If unsure, say N. @@ -105,7 +119,8 @@ depends on EROFS_FS default y help - Enable transparent compression support for EROFS file systems. + Enable EROFS compression layouts so that filesystems containing + compressed files can be parsed by the kernel. If you don't want to enable compression feature, say N.
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index abe873f..98cdaa1 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c
@@ -25,10 +25,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) container_of(iocb, struct erofs_fileio_rq, iocb); struct folio_iter fi; - if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) { - bio_advance(&rq->bio, ret); - zero_fill_bio(&rq->bio); - } + if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) + ret = -EIO; if (!rq->bio.bi_end_io) { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio));
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 4f86169..4b3d214 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c
@@ -222,6 +222,7 @@ static int erofs_read_inode(struct inode *inode) static int erofs_fill_inode(struct inode *inode) { + const struct address_space_operations *aops; int err; trace_erofs_fill_inode(inode); @@ -254,7 +255,11 @@ static int erofs_fill_inode(struct inode *inode) } mapping_set_large_folios(inode->i_mapping); - return erofs_inode_set_aops(inode, inode, false); + aops = erofs_get_aops(inode, false); + if (IS_ERR(aops)) + return PTR_ERR(aops); + inode->i_mapping->a_ops = aops; + return 0; } /*
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d163445..a4f0a42 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h
@@ -471,26 +471,24 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) return NULL; } -static inline int erofs_inode_set_aops(struct inode *inode, - struct inode *realinode, bool no_fscache) +static inline const struct address_space_operations * +erofs_get_aops(struct inode *realinode, bool no_fscache) { if (erofs_inode_is_data_compressed(EROFS_I(realinode)->datalayout)) { if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP)) - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); DO_ONCE_LITE_IF(realinode->i_blkbits != PAGE_SHIFT, erofs_info, realinode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); - inode->i_mapping->a_ops = &z_erofs_aops; - return 0; + return &z_erofs_aops; } - inode->i_mapping->a_ops = &erofs_aops; if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !no_fscache && erofs_is_fscache_mode(realinode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + return &erofs_fscache_access_aops; if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && erofs_is_fileio_mode(EROFS_SB(realinode->i_sb))) - inode->i_mapping->a_ops = &erofs_fileio_aops; - return 0; + return &erofs_fileio_aops; + return &erofs_aops; } int erofs_register_sysfs(struct super_block *sb);
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c index ce98032..ec433ba 100644 --- a/fs/erofs/ishare.c +++ b/fs/erofs/ishare.c
@@ -40,10 +40,14 @@ bool erofs_ishare_fill_inode(struct inode *inode) { struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); struct erofs_inode *vi = EROFS_I(inode); + const struct address_space_operations *aops; struct erofs_inode_fingerprint fp; struct inode *sharedinode; unsigned long hash; + aops = erofs_get_aops(inode, true); + if (IS_ERR(aops)) + return false; if (erofs_xattr_fill_inode_fingerprint(&fp, inode, sbi->domain_id)) return false; hash = xxh32(fp.opaque, fp.size, 0); @@ -56,15 +60,15 @@ bool erofs_ishare_fill_inode(struct inode *inode) } if (inode_state_read_once(sharedinode) & I_NEW) { - if (erofs_inode_set_aops(sharedinode, inode, true)) { - iget_failed(sharedinode); - kfree(fp.opaque); - return false; - } + sharedinode->i_mapping->a_ops = aops; sharedinode->i_size = vi->vfs_inode.i_size; unlock_new_inode(sharedinode); } else { kfree(fp.opaque); + if (aops != sharedinode->i_mapping->a_ops) { + iput(sharedinode); + return false; + } if (sharedinode->i_size != vi->vfs_inode.i_size) { _erofs_printk(inode->i_sb, KERN_WARNING "size(%lld:%lld) not matches for the same fingerprint\n", @@ -196,8 +200,19 @@ struct inode *erofs_real_inode(struct inode *inode, bool *need_iput) int __init erofs_init_ishare(void) { - erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type); - return PTR_ERR_OR_ZERO(erofs_ishare_mnt); + struct vfsmount *mnt; + int ret; + + mnt = kern_mount(&erofs_anon_fs_type); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + /* generic_fadvise() doesn't work if s_bdi == &noop_backing_dev_info */ + ret = super_setup_bdi(mnt->mnt_sb); + if (ret) + kern_unmount(mnt); + else + erofs_ishare_mnt = mnt; + return ret; } void erofs_exit_ishare(void)
diff --git a/fs/erofs/super.c b/fs/erofs/super.c index d499568..972a0c8 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c
@@ -424,26 +424,23 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) { -#ifdef CONFIG_FS_DAX - struct erofs_sb_info *sbi = fc->s_fs_info; + if (IS_ENABLED(CONFIG_FS_DAX)) { + struct erofs_sb_info *sbi = fc->s_fs_info; - switch (mode) { - case EROFS_MOUNT_DAX_ALWAYS: - set_opt(&sbi->opt, DAX_ALWAYS); - clear_opt(&sbi->opt, DAX_NEVER); - return true; - case EROFS_MOUNT_DAX_NEVER: - set_opt(&sbi->opt, DAX_NEVER); - clear_opt(&sbi->opt, DAX_ALWAYS); - return true; - default: + if (mode == EROFS_MOUNT_DAX_ALWAYS) { + set_opt(&sbi->opt, DAX_ALWAYS); + clear_opt(&sbi->opt, DAX_NEVER); + return true; + } else if (mode == EROFS_MOUNT_DAX_NEVER) { + set_opt(&sbi->opt, DAX_NEVER); + clear_opt(&sbi->opt, DAX_ALWAYS); + return true; + } DBG_BUGON(1); return false; } -#else errorfc(fc, "dax options not supported"); return false; -#endif } static int erofs_fc_parse_param(struct fs_context *fc, @@ -460,31 +457,26 @@ static int erofs_fc_parse_param(struct fs_context *fc, switch (opt) { case Opt_user_xattr: -#ifdef CONFIG_EROFS_FS_XATTR - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_XATTR)) + errorfc(fc, "{,no}user_xattr options not supported"); + else if (result.boolean) set_opt(&sbi->opt, XATTR_USER); else clear_opt(&sbi->opt, XATTR_USER); -#else - errorfc(fc, "{,no}user_xattr options not supported"); -#endif break; case Opt_acl: -#ifdef CONFIG_EROFS_FS_POSIX_ACL - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) + errorfc(fc, "{,no}acl options not supported"); + else if (result.boolean) set_opt(&sbi->opt, POSIX_ACL); else clear_opt(&sbi->opt, POSIX_ACL); -#else - errorfc(fc, "{,no}acl options not supported"); -#endif break; case Opt_cache_strategy: -#ifdef CONFIG_EROFS_FS_ZIP - sbi->opt.cache_strategy = result.uint_32; -#else - errorfc(fc, "compression not supported, cache_strategy ignored"); -#endif + if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP)) + errorfc(fc, "compression not supported, cache_strategy ignored"); + else + sbi->opt.cache_strategy = result.uint_32; break; case Opt_dax: if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) @@ -533,24 +525,21 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; #endif case Opt_directio: -#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE - if (result.boolean) + if (!IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE)) + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); + else if (result.boolean) set_opt(&sbi->opt, DIRECT_IO); else clear_opt(&sbi->opt, DIRECT_IO); -#else - errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); -#endif break; case Opt_fsoffset: sbi->dif0.fsoff = result.uint_64; break; case Opt_inode_share: -#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE - set_opt(&sbi->opt, INODE_SHARE); -#else - errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); -#endif + if (!IS_ENABLED(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)) + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); + else + set_opt(&sbi->opt, INODE_SHARE); break; } return 0; @@ -809,8 +798,7 @@ static int erofs_fc_get_tree(struct fs_context *fc) ret = get_tree_bdev_flags(fc, erofs_fc_fill_super, IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ? GET_TREE_BDEV_QUIET_LOOKUP : 0); -#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE - if (ret == -ENOTBLK) { + if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && ret == -ENOTBLK) { struct file *file; if (!fc->source) @@ -824,7 +812,6 @@ static int erofs_fc_get_tree(struct fs_context *fc) sbi->dif0.file->f_mapping->a_ops->read_folio) return get_tree_nodev(fc, erofs_fc_fill_super); } -#endif return ret; } @@ -1108,12 +1095,12 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",dax=never"); if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) seq_puts(seq, ",directio"); -#ifdef CONFIG_EROFS_FS_ONDEMAND - if (sbi->fsid) - seq_printf(seq, ",fsid=%s", sbi->fsid); - if (sbi->domain_id) - seq_printf(seq, ",domain_id=%s", sbi->domain_id); -#endif + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND)) { + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); + if (sbi->domain_id) + seq_printf(seq, ",domain_id=%s", sbi->domain_id); + } if (sbi->dif0.fsoff) seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff); if (test_opt(opt, INODE_SHARE))
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3977e42..fe8121d 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c
@@ -1445,6 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + int gfp_flag; /* wake up the caller thread for sync decompression */ if (io->sync) { @@ -1477,7 +1478,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; return; } + gfp_flag = memalloc_noio_save(); z_erofs_decompressqueue_work(&io->u.work); + memalloc_noio_restore(gfp_flag); } static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index c8d8e12..3077550 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c
@@ -513,6 +513,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); + unsigned int bmask = sb->s_blocksize - 1; bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; @@ -596,17 +597,17 @@ static int z_erofs_map_blocks_ext(struct inode *inode, map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; + if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) + map->m_flags |= EROFS_MAP_PARTIAL_REF; + map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; if (fmt) map->m_algorithmformat = fmt - 1; - else if (interlaced && !erofs_blkoff(sb, map->m_pa)) + else if (interlaced && !((map->m_pa | map->m_plen) & bmask)) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; - if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) - map->m_flags |= EROFS_MAP_PARTIAL_REF; - map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; } } map->m_llen = lend - map->m_la;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a8c278c..5714e90 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c
@@ -2061,7 +2061,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. + * Return: depth of the subtree, or a value bigger than EP_MAX_NESTS if we found + * a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { @@ -2080,7 +2081,7 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - result = INT_MAX; + result = EP_MAX_NESTS+1; else result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); if (result > EP_MAX_NESTS)
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 72206a2..3baee4e 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile
@@ -14,7 +14,8 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-inode-test-objs += inode-test.o -obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o +ext4-test-objs += inode-test.o mballoc-test.o \ + extents-test.o +obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o ext4-$(CONFIG_FS_VERITY) += verity.o ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index cf0a097..f41f320 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c
@@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, */ if (handle) { + /* + * Since the inode is new it is ok to pass the + * XATTR_CREATE flag. This is necessary to match the + * remaining journal credits check in the set_handle + * function with the credits allocated for the new + * inode. + */ res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); + ctx, len, XATTR_CREATE); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 293f698..7617e2d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h
@@ -1570,6 +1570,7 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; + struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */ struct super_block *s_sb; struct buffer_head *s_mmp_bh; @@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode) extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); + +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test") +#endif #endif /* __KERNEL__ */ #endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index c484125d..ebaf7cc 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h
@@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, 0xffff); } +extern int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path); +extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern int ext4_ext_space_root_idx_test(struct inode *inode, int check); +extern struct ext4_ext_path *ext4_split_convert_extents_test( + handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path, + int flags, unsigned int *allocated); +#endif #endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 7c4690e..5496b2c 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c
@@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = { static void extents_kunit_exit(struct kunit *test) { - struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info; + struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb; + struct ext4_sb_info *sbi = sb->s_fs_info; + ext4_es_unregister_shrinker(sbi); kfree(sbi); kfree(k_ctx.k_ei); kfree(k_ctx.k_data); @@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test) eh->eh_depth = 0; eh->eh_entries = cpu_to_le16(1); eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = - cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0)); + eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test( + &k_ctx.k_ei->vfs_inode, 0)); eh->eh_generation = 0; /* @@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test) switch (param->type) { case TEST_SPLIT_CONVERT: - path = ext4_split_convert_extents(NULL, inode, &map, path, - param->split_flags, NULL); + path = ext4_split_convert_extents_test(NULL, inode, &map, + path, param->split_flags, NULL); break; case TEST_CREATE_BLOCKS: ext4_map_create_blocks_helper(test, inode, &map, param->split_flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae3804f..8cce147 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c
@@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -static int __ext4_ext_dirty(const char *where, unsigned int line, - handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) +int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + return -EFSCORRUPTED; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + err = -EFSCORRUPTED; + goto clean; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) } /* FIXME!! we need to try to merge to left or right after zero-out */ -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; @@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, insert_err = PTR_ERR(path); err = 0; + if (insert_err != -ENOSPC && insert_err != -EDQUOT && + insert_err != -ENOMEM) + goto out_path; /* * Get a new path to try to zeroout or fix the extent length. @@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, goto out_path; } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, + "bad extent address lblock: %lu, depth: %d pblock %llu", + (unsigned long)ee_block, depth, path[depth].p_block); + err = -EFSCORRUPTED; + goto out; + } + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - depth = ext_depth(inode); - ex = path[depth].p_ext; - fix_extent_len: ex->ee_len = orig_ex.ee_len; err = ext4_ext_dirty(handle, inode, path + path->p_depth); @@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, ext4_ext_mark_initialized(ex); - ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + depth); if (err) return err; @@ -4457,9 +4482,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (IS_ERR(path)) { err = PTR_ERR(path); - if (allocated_clusters) { + /* + * Gracefully handle out of space conditions. If the filesystem + * is inconsistent, we'll just leak allocated blocks to avoid + * causing even more damage. + */ + if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) { int fb_flags = 0; - /* * free data blocks we just allocated. * not a good idea to call discard here directly, @@ -6238,6 +6267,33 @@ int ext4_ext_clear_bb(struct inode *inode) return 0; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "extents-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +int ext4_ext_space_root_idx_test(struct inode *inode, int check) +{ + return ext4_ext_space_root_idx(inode, check); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test); + +struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle, + struct inode *inode, struct ext4_map_blocks *map, + struct ext4_ext_path *path, int flags, + unsigned int *allocated) +{ + return ext4_split_convert_extents(handle, inode, map, path, + flags, allocated); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks); #endif
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f575751..2f0057e 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c
@@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal) int ret = 0; list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_submit_inode_data(journal, ei->jinode); + ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_wait_inode_data(journal, ei->jinode); + ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } @@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb, /* Immediately update the inode on disk. */ ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); if (ret) - goto out; + goto out_brelse; ret = sync_dirty_buffer(iloc.bh); if (ret) - goto out; + goto out_brelse; ret = ext4_mark_inode_used(sb, ino); if (ret) - goto out; + goto out_brelse; /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { ext4_debug("Inode not found."); - return -EFSCORRUPTED; + inode = NULL; + ret = -EFSCORRUPTED; + goto out_brelse; } /* @@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb, ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); sync_dirty_buffer(iloc.bh); +out_brelse: brelse(iloc.bh); out: iput(inode); if (!ret) blkdev_issue_flush(sb->s_bdev); - return 0; + return ret; } /*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e476c6d..bd8f230 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c
@@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + }; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); - if (!ret) - ret = ext4_sync_parent(inode); + if (ret) + return ret; + + /* Force writeout of inode table buffer to disk */ + ret = ext4_write_inode(inode, &wbc); + if (ret) + return ret; + + ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b20a1bf..b1bc195 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c
@@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (unlikely(!gdp)) return 0; + /* Inode was never used in this filesystem? */ + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) || + ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp))) + return 0; + bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) + (ino / inodes_per_block)); if (!bh || !buffer_uptodate(bh))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1f6bc05..408677f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c
@@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); - BUG_ON(len > PAGE_SIZE); + + if (len > PAGE_SIZE) { + ext4_error_inode(inode, __func__, __LINE__, 0, + "inline size %zu exceeds PAGE_SIZE", len); + ret = -EFSCORRUPTED; + brelse(iloc.bh); + goto out; + } + kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 396dc3a..1123d99 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); + trace_ext4_begin_ordered_truncate(inode, new_size); /* * If jinode is zero, then we never opened the file for @@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, * jbd2_journal_begin_ordered_truncate() since there's no * outstanding writes we need to flush. */ - if (!EXT4_I(inode)->jinode) + if (!jinode) return 0; return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode, + jinode, new_size); } @@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); goto no_delete; @@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode) spin_unlock(&inode->i_lock); return -ENOMEM; } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); + jbd2_journal_init_jbd_inode(jinode, inode); + /* + * Publish ->jinode only after it is fully initialized so that + * readers never observe a partially initialized jbd2_inode. + */ + smp_wmb(); + WRITE_ONCE(ei->jinode, jinode); jinode = NULL; } spin_unlock(&inode->i_lock); @@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode->i_op = &ext4_encrypted_symlink_inode_operations; } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - if (inode->i_size == 0 || - inode->i_size >= sizeof(ei->i_data) || - strnlen((char *)ei->i_data, inode->i_size + 1) != - inode->i_size) { - ext4_error_inode(inode, function, line, 0, - "invalid fast symlink length %llu", - (unsigned long long)inode->i_size); - ret = -EFSCORRUPTED; - goto bad_inode; + + /* + * Orphan cleanup can see inodes with i_size == 0 + * and i_data uninitialized. Skip size checks in + * that case. This is safe because the first thing + * ext4_evict_inode() does for fast symlinks is + * clearing of i_data and i_size. + */ + if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) { + if (inode->i_nlink != 0) { + ext4_error_inode(inode, function, line, 0, + "invalid orphan symlink nlink %d", + inode->i_nlink); + ret = -EFSCORRUPTED; + goto bad_inode; + } + } else { + if (inode->i_size == 0 || + inode->i_size >= sizeof(ei->i_data) || + strnlen((char *)ei->i_data, inode->i_size + 1) != + inode->i_size) { + ext4_error_inode(inode, function, line, 0, + "invalid fast symlink length %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + inode_set_cached_link(inode, (char *)ei->i_data, + inode->i_size); } - inode_set_cached_link(inode, (char *)ei->i_data, - inode->i_size); } else { inode->i_op = &ext4_symlink_inode_operations; } @@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_size == inode->i_size) inc_ivers = false; + /* + * If file has inline data but new size exceeds inline capacity, + * convert to extent-based storage first to prevent inconsistent + * state (inline flag set but size exceeds inline capacity). + */ + if (ext4_has_inline_data(inode) && + attr->ia_size > EXT4_I(inode)->i_inline_size) { + error = ext4_convert_inline_data(inode); + if (error) + goto err_out; + } + if (shrink) { if (ext4_should_order_data(inode)) { error = ext4_begin_ordered_truncate(inode,
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c index 9fbdf6a..6f5bfbb 100644 --- a/fs/ext4/mballoc-test.c +++ b/fs/ext4/mballoc-test.c
@@ -8,6 +8,7 @@ #include <linux/random.h> #include "ext4.h" +#include "mballoc.h" struct mbt_grp_ctx { struct buffer_head bitmap_bh; @@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state, if (state) mb_set_bits(bitmap_bh->b_data, blkoff, len); else - mb_clear_bits(bitmap_bh->b_data, blkoff, len); + mb_clear_bits_test(bitmap_bh->b_data, blkoff, len); return 0; } @@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test) /* get block at goal */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal, found, "failed to alloc block at goal, expected %llu found %llu", ar.goal, found); /* get block after goal in goal group */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found, "failed to alloc block after goal in goal group, expected %llu found %llu", ar.goal + 1, found); @@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test) /* get block after goal group */ mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, goal_group + 1), found, "failed to alloc block after goal group, expected %llu found %llu", @@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = goal_group; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found, "failed to alloc block before goal group, expected %llu found %llu", @@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = 0; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_NE_MSG(test, err, 0, "unexpectedly get block when no block is available"); } @@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb, continue; bitmap = mbt_ctx_bitmap(sb, i); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ_MSG(test, bit, max, "free block on unexpected group %d", i); } bitmap = mbt_ctx_bitmap(sb, goal_group); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, bit, start); - bit = mb_find_next_bit(bitmap, max, bit + 1); + bit = mb_find_next_bit_test(bitmap, max, bit + 1); KUNIT_ASSERT_EQ(test, bit, start + len); } @@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group, block = ext4_group_first_block_no(sb, goal_group) + EXT4_C2B(sbi, start); - ext4_free_blocks_simple(inode, block, len); + ext4_free_blocks_simple_test(inode, block, len); validate_free_blocks_simple(test, sb, goal_group, start, len); mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); } @@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test, bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP); memset(bitmap, 0, sb->s_blocksize); - ret = ext4_mb_mark_diskspace_used(ac, NULL); + ret = ext4_mb_mark_diskspace_used_test(ac, NULL); KUNIT_ASSERT_EQ(test, ret, 0); max = EXT4_CLUSTERS_PER_GROUP(sb); - i = mb_find_next_bit(bitmap, max, 0); + i = mb_find_next_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, i, start); - i = mb_find_next_zero_bit(bitmap, max, i + 1); + i = mb_find_next_zero_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, i, start + len); - i = mb_find_next_bit(bitmap, max, i + 1); + i = mb_find_next_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, max, i); } @@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy, max = EXT4_CLUSTERS_PER_GROUP(sb); bb_h = buddy + sbi->s_mb_offsets[1]; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); grp->bb_first_free = off; while (off < max) { grp->bb_counters[0]++; grp->bb_free++; - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { grp->bb_free++; grp->bb_counters[0]--; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[1]++; grp->bb_largest_free_order = 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) { bb = buddy + sbi->s_mb_offsets[order]; bb_h = buddy + sbi->s_mb_offsets[order + 1]; max = max >> 1; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); while (off < max) { - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { mb_set_bits(bb, off, 2); grp->bb_counters[order] -= 2; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[order + 1]++; grp->bb_largest_free_order = order + 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } } max = EXT4_CLUSTERS_PER_GROUP(sb); - off = mb_find_next_zero_bit(bitmap, max, 0); + off = mb_find_next_zero_bit_test(bitmap, max, 0); while (off < max) { grp->bb_fragments++; - off = mb_find_next_bit(bitmap, max, off + 1); + off = mb_find_next_bit_test(bitmap, max, off + 1); if (off + 1 >= max) break; - off = mb_find_next_zero_bit(bitmap, max, off + 1); + off = mb_find_next_zero_bit_test(bitmap, max, off + 1); } } @@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap, /* needed by validation in ext4_mb_generate_buddy */ ext4_grp->bb_free = mbt_grp->bb_free; memset(ext4_buddy, 0xff, sb->s_blocksize); - ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, + ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, ext4_grp); KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize), @@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(e4b, &ex); + mb_mark_used_test(e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); mb_set_bits(bitmap, start, len); @@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb); @@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test) test_mb_mark_used_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static void @@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b, return; ext4_lock_group(sb, e4b->bd_group); - mb_free_blocks(NULL, e4b, start, len); + mb_free_blocks_test(NULL, e4b, start, len); ext4_unlock_group(sb, e4b->bd_group); - mb_clear_bits(bitmap, start, len); + mb_clear_bits_test(bitmap, start, len); /* bypass bb_free validatoin in ext4_mb_generate_buddy */ grp->bb_free += len; memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_start = 0; @@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test) ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); grp->bb_free = 0; @@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test) test_mb_free_blocks_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } #define COUNT_FOR_ESTIMATE 100000 @@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test) if (sb->s_blocksize > PAGE_SIZE) kunit_skip(test, "blocksize exceeds pagesize"); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_group = TEST_GOAL_GROUP; @@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test) ex.fe_start = ranges[i].start; ex.fe_len = ranges[i].len; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); } end = jiffies; @@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test) continue; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_free_blocks(NULL, &e4b, ranges[i].start, + mb_free_blocks_test(NULL, &e4b, ranges[i].start, ranges[i].len); ext4_unlock_group(sb, TEST_GOAL_GROUP); } } kunit_info(test, "costed jiffies %lu\n", all); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 20e9fda..bb58eaf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c
@@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac) /* searching for the right group start from the goal value specified */ start = ac->ac_g_ex.fe_group; + if (start >= ngroups) + start = 0; ac->ac_prefetch_grp = start; ac->ac_prefetch_nr = 0; @@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) + if (err) { + if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) && + !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + return 0; return err; + } ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) @@ -3580,9 +3586,7 @@ static int ext4_mb_init_backend(struct super_block *sb) rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - rcu_read_lock(); - kvfree(rcu_dereference(sbi->s_group_info)); - rcu_read_unlock(); + kvfree(rcu_access_pointer(sbi->s_group_info)); return -ENOMEM; } @@ -3889,15 +3893,14 @@ void ext4_mb_release(struct super_block *sb) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); int count; - if (test_opt(sb, DISCARD)) { - /* - * wait the discard work to drain all of ext4_free_data - */ - flush_work(&sbi->s_discard_work); - WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); - } + /* + * wait the discard work to drain all of ext4_free_data + */ + flush_work(&sbi->s_discard_work); + WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); - if (sbi->s_group_info) { + group_info = rcu_access_pointer(sbi->s_group_info); + if (group_info) { for (i = 0; i < ngroups; i++) { cond_resched(); grinfo = ext4_get_group_info(sb, i); @@ -3915,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - rcu_read_lock(); - group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) kfree(group_info[i]); kvfree(group_info); - rcu_read_unlock(); } ext4_mb_avg_fragment_size_destroy(sbi); ext4_mb_largest_free_orders_destroy(sbi); @@ -4084,7 +4084,7 @@ void ext4_exit_mballoc(void) #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 #define EXT4_MB_SYNC_UPDATE 0x0002 -static int +int ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, ext4_group_t group, ext4_grpblk_t blkoff, ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) @@ -7188,6 +7188,102 @@ ext4_mballoc_query_range( return error; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "mballoc-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +void mb_clear_bits_test(void *bm, int cur, int len) +{ + mb_clear_bits(bm, cur, len); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test); + +ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp) +{ + return ext4_mb_new_blocks_simple(ar, errp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test); + +int mb_find_next_zero_bit_test(void *addr, int max, int start) +{ + return mb_find_next_zero_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test); + +int mb_find_next_bit_test(void *addr, int max, int start) +{ + return mb_find_next_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test); + +void mb_clear_bit_test(int bit, void *addr) +{ + mb_clear_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test); + +int mb_test_bit_test(int bit, void *addr) +{ + return mb_test_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test); + +int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle) +{ + return ext4_mb_mark_diskspace_used(ac, handle); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test); + +int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex) +{ + return mb_mark_used(e4b, ex); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test); + +void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy, + void *bitmap, ext4_group_t group, + struct ext4_group_info *grp) +{ + ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test); + +int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) +{ + return ext4_mb_load_buddy(sb, group, e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test); + +void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b) +{ + ext4_mb_unload_buddy(e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test); + +void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b, + int first, int count) +{ + mb_free_blocks(inode, e4b, first, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test); + +void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block, + unsigned long count) +{ + return ext4_free_blocks_simple(inode, block, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait); +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context); #endif
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 15a049f..39333ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h
@@ -270,4 +270,34 @@ ext4_mballoc_query_range( ext4_mballoc_query_range_fn formatter, void *priv); +extern int ext4_mb_mark_context(handle_t *handle, + struct super_block *sb, bool state, + ext4_group_t group, ext4_grpblk_t blkoff, + ext4_grpblk_t len, int flags, + ext4_grpblk_t *ret_changed); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern void mb_clear_bits_test(void *bm, int cur, int len); +extern ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp); +extern int mb_find_next_zero_bit_test(void *addr, int max, int start); +extern int mb_find_next_bit_test(void *addr, int max, int start); +extern void mb_clear_bit_test(int bit, void *addr); +extern int mb_test_bit_test(int bit, void *addr); +extern int +ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle); +extern int mb_mark_used_test(struct ext4_buddy *e4b, + struct ext4_free_extent *ex); +extern void ext4_mb_generate_buddy_test(struct super_block *sb, + void *buddy, void *bitmap, ext4_group_t group, + struct ext4_group_info *grp); +extern int ext4_mb_load_buddy_test(struct super_block *sb, + ext4_group_t group, struct ext4_buddy *e4b); +extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b); +extern void mb_free_blocks_test(struct inode *inode, + struct ext4_buddy *e4b, int first, int count); +extern void ext4_free_blocks_simple_test(struct inode *inode, + ext4_fsblk_t block, unsigned long count); +#endif #endif
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a8c95ee..39fe50b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c
@@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, nr_to_submit++; } while ((bh = bh->b_this_page) != head); - /* Nothing to submit? Just unlock the folio... */ - if (!nr_to_submit) + if (!nr_to_submit) { + /* + * We have nothing to submit. Just cycle the folio through + * writeback state to properly update xarray tags. + */ + __folio_start_writeback(folio, keep_towrite); + folio_end_writeback(folio); return 0; + } bh = head = folio_buffers(folio);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 43f680c..a34efb4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c
@@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi) struct buffer_head **group_desc; int i; - rcu_read_lock(); - group_desc = rcu_dereference(sbi->s_group_desc); + group_desc = rcu_access_pointer(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); - rcu_read_unlock(); } static void ext4_flex_groups_free(struct ext4_sb_info *sbi) @@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi) struct flex_groups **flex_groups; int i; - rcu_read_lock(); - flex_groups = rcu_dereference(sbi->s_flex_groups); + flex_groups = rcu_access_pointer(sbi->s_flex_groups); if (flex_groups) { for (i = 0; i < sbi->s_flex_groups_allocated; i++) kvfree(flex_groups[i]); kvfree(flex_groups); } - rcu_read_unlock(); } static void ext4_put_super(struct super_block *sb) @@ -1527,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode) invalidate_inode_buffers(inode); clear_inode(inode); ext4_discard_preallocations(inode); + /* + * We must remove the inode from the hash before ext4_free_inode() + * clears the bit in inode bitmap as otherwise another process reusing + * the inode will block in insert_inode_hash() waiting for inode + * eviction to complete while holding transaction handle open, but + * ext4_evict_inode() still running for that inode could block waiting + * for transaction commit if the inode is marked as IS_SYNC => deadlock. + * + * Removing the inode from the hash here is safe. There are two cases + * to consider: + * 1) The inode still has references to it (i_nlink > 0). In that case + * we are keeping the inode and once we remove the inode from the hash, + * iget() can create the new inode structure for the same inode number + * and we are fine with that as all IO on behalf of the inode is + * finished. + * 2) We are deleting the inode (i_nlink == 0). In that case inode + * number cannot be reused until ext4_free_inode() clears the bit in + * the inode bitmap, at which point all IO is done and reuse is fine + * again. + */ + remove_inode_hash(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { @@ -3633,6 +3650,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) "extents feature\n"); return 0; } + if (ext4_has_feature_bigalloc(sb) && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { + ext4_msg(sb, KERN_WARNING, + "bad geometry: bigalloc file system with non-zero " + "first_data_block\n"); + return 0; + } #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || @@ -5403,6 +5427,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); + mutex_init(&sbi->s_error_notify_mutex); INIT_WORK(&sbi->s_sb_upd_work, update_super_work); err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b87d7bd..923b375 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c
@@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = { void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) { - sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_lock(&sbi->s_error_notify_mutex); + if (sbi->s_kobj.state_in_sysfs) + sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_unlock(&sbi->s_error_notify_mutex); } static struct kobject *ext4_root; @@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb) int err; init_completion(&sbi->s_kobj_unregister); + mutex_lock(&sbi->s_error_notify_mutex); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, "%s", sb->s_id); + mutex_unlock(&sbi->s_error_notify_mutex); if (err) { kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); @@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb) if (sbi->s_proc) remove_proc_subtree(sb->s_id, ext4_proc_root); + + mutex_lock(&sbi->s_error_notify_mutex); kobject_del(&sbi->s_kobj); + mutex_unlock(&sbi->s_error_notify_mutex); } int __init ext4_init_sysfs(void)
diff --git a/fs/file_attr.c b/fs/file_attr.c index 6d2a298..da983e1 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c
@@ -378,7 +378,7 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename, struct path filepath __free(path_put) = {}; unsigned int lookup_flags = 0; struct file_attr fattr; - struct file_kattr fa; + struct file_kattr fa = { .flags_valid = true }; /* hint only */ int error; BUILD_BUG_ON(sizeof(struct file_attr) < FILE_ATTR_SIZE_VER0);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8f8069f..3c75ee0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c
@@ -198,10 +198,11 @@ static void wb_queue_work(struct bdi_writeback *wb, static bool wb_wait_for_completion_cb(struct wb_completion *done) { + unsigned long timeout = sysctl_hung_task_timeout_secs; unsigned long waited_secs = (jiffies - done->wait_start) / HZ; done->progress_stamp = jiffies; - if (waited_secs > sysctl_hung_task_timeout_secs) + if (timeout && (waited_secs > timeout)) pr_info("INFO: The task %s:%d has been waiting for writeback " "completion for more than %lu seconds.", current->comm, current->pid, waited_secs); @@ -1710,6 +1711,19 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } } +static bool __sync_lazytime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (!(inode_state_read(inode) & I_DIRTY_TIME)) { + spin_unlock(&inode->i_lock); + return false; + } + inode_state_clear(inode, I_DIRTY_TIME); + spin_unlock(&inode->i_lock); + inode->i_op->sync_lazytime(inode); + return true; +} + bool sync_lazytime(struct inode *inode) { if (!(inode_state_read_once(inode) & I_DIRTY_TIME)) @@ -1717,9 +1731,8 @@ bool sync_lazytime(struct inode *inode) trace_writeback_lazytime(inode); if (inode->i_op->sync_lazytime) - inode->i_op->sync_lazytime(inode); - else - mark_inode_dirty_sync(inode); + return __sync_lazytime(inode); + mark_inode_dirty_sync(inode); return true; } @@ -1954,6 +1967,7 @@ static long writeback_sb_inodes(struct super_block *sb, .range_end = LLONG_MAX, }; unsigned long start_time = jiffies; + unsigned long timeout = sysctl_hung_task_timeout_secs; long write_chunk; long total_wrote = 0; /* count both pages and inodes */ unsigned long dirtied_before = jiffies; @@ -2040,9 +2054,8 @@ static long writeback_sb_inodes(struct super_block *sb, __writeback_single_inode(inode, &wbc); /* Report progress to inform the hung task detector of the progress. */ - if (work->done && work->done->progress_stamp && - (jiffies - work->done->progress_stamp) > HZ * - sysctl_hung_task_timeout_secs / 2) + if (work->done && work->done->progress_stamp && timeout && + (jiffies - work->done->progress_stamp) > HZ * timeout / 2) wake_up_all(work->done->waitq); wbc_detach_inode(&wbc); @@ -2774,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb) * The mapping can appear untagged while still on-list since we * do not have the mapping lock. Skip it here, wb completion * will remove it. - * - * If the mapping does not have data integrity semantics, - * there's no need to wait for the writeout to complete, as the - * mapping cannot guarantee that data is persistently stored. */ - if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) || - mapping_no_data_integrity(mapping)) + if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) continue; spin_unlock_irq(&sb->s_inode_wblist_lock); @@ -2915,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb) */ if (bdi == &noop_backing_dev_info) return; + + /* + * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to + * wait for the writeout to complete, as the filesystem cannot guarantee + * data persistence on sync. Just kick off writeback and return. + */ + if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) { + wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC); + return; + } + WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b1bb715..676fd98 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c
@@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags) inode->i_fop = &fuse_file_operations; inode->i_data.a_ops = &fuse_file_aops; - if (fc->writeback_cache) { + if (fc->writeback_cache) mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data); - mapping_set_no_data_integrity(&inode->i_data); - } INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e57b8af..c795abe 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c
@@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb) sb->s_export_op = &fuse_export_operations; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; sb->s_iflags |= SB_I_NOIDMAP; + sb->s_iflags |= SB_I_NO_DATA_INTEGRITY; if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c index fc045f2..edd9081 100644 --- a/fs/iomap/bio.c +++ b/fs/iomap/bio.c
@@ -8,7 +8,10 @@ #include "internal.h" #include "trace.h" -static void iomap_read_end_io(struct bio *bio) +static DEFINE_SPINLOCK(failed_read_lock); +static struct bio_list failed_read_list = BIO_EMPTY_LIST; + +static void __iomap_read_end_io(struct bio *bio) { int error = blk_status_to_errno(bio->bi_status); struct folio_iter fi; @@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } +static void +iomap_fail_reads( + struct work_struct *work) +{ + struct bio *bio; + struct bio_list tmp = BIO_EMPTY_LIST; + unsigned long flags; + + spin_lock_irqsave(&failed_read_lock, flags); + bio_list_merge_init(&tmp, &failed_read_list); + spin_unlock_irqrestore(&failed_read_lock, flags); + + while ((bio = bio_list_pop(&tmp)) != NULL) { + __iomap_read_end_io(bio); + cond_resched(); + } +} + +static DECLARE_WORK(failed_read_work, iomap_fail_reads); + +static void iomap_fail_buffered_read(struct bio *bio) +{ + unsigned long flags; + + /* + * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions + * in the fserror code. The caller no longer owns the bio reference + * after the spinlock drops. + */ + spin_lock_irqsave(&failed_read_lock, flags); + if (bio_list_empty(&failed_read_list)) + WARN_ON_ONCE(!schedule_work(&failed_read_work)); + bio_list_add(&failed_read_list, bio); + spin_unlock_irqrestore(&failed_read_lock, flags); +} + +static void iomap_read_end_io(struct bio *bio) +{ + if (bio->bi_status) { + iomap_fail_buffered_read(bio); + return; + } + + __iomap_read_end_io(bio); +} + static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a0c46aa..92a831cf 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c
@@ -80,18 +80,27 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off, { struct iomap_folio_state *ifs = folio->private; unsigned long flags; - bool uptodate = true; + bool mark_uptodate = true; if (folio_test_uptodate(folio)) return; if (ifs) { spin_lock_irqsave(&ifs->state_lock, flags); - uptodate = ifs_set_range_uptodate(folio, ifs, off, len); + /* + * If a read with bytes pending is in progress, we must not call + * folio_mark_uptodate(). The read completion path + * (iomap_read_end()) will call folio_end_read(), which uses XOR + * semantics to set the uptodate bit. If we set it here, the XOR + * in folio_end_read() will clear it, leaving the folio not + * uptodate. + */ + mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) && + !ifs->read_bytes_pending; spin_unlock_irqrestore(&ifs->state_lock, flags); } - if (uptodate) + if (mark_uptodate) folio_mark_uptodate(folio); } @@ -505,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t folio_len = folio_size(folio); + struct iomap_folio_state *ifs; size_t poff, plen; loff_t pos_diff; int ret; @@ -516,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return iomap_iter_advance(iter, length); } - ifs_alloc(iter->inode, folio, iter->flags); + ifs = ifs_alloc(iter->inode, folio, iter->flags); length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos)); while (length) { @@ -551,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, *bytes_submitted += plen; /* - * If the entire folio has been read in by the IO - * helper, then the helper owns the folio and will end - * the read on it. + * Hand off folio ownership to the IO helper when: + * 1) The entire folio has been submitted for IO, or + * 2) There is no ifs attached to the folio + * + * Case (2) occurs when 1 << i_blkbits matches the folio + * size but the underlying filesystem or block device + * uses a smaller granularity for IO. */ - if (*bytes_submitted == folio_len) + if (*bytes_submitted == folio_len || !ifs) ctx->cur_folio = NULL; } @@ -624,6 +638,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter, * iomap_readahead - Attempt to read pages from a file. * @ops: The operations vector for the filesystem. * @ctx: The ctx used for issuing readahead. + * @private: The filesystem-specific information for issuing iomap_iter. * * This function is for filesystems to call to implement their readahead * address_space operation.
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 95254aa..e911dae 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c
@@ -87,6 +87,19 @@ static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio) return FSERR_DIRECTIO_READ; } +static inline bool should_report_dio_fserror(const struct iomap_dio *dio) +{ + switch (dio->error) { + case 0: + case -EAGAIN: + case -ENOTBLK: + /* don't send fsnotify for success or magic retry codes */ + return false; + default: + return true; + } +} + ssize_t iomap_dio_complete(struct iomap_dio *dio) { const struct iomap_dio_ops *dops = dio->dops; @@ -96,7 +109,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (dops && dops->end_io) ret = dops->end_io(iocb, dio->size, ret, dio->flags); - if (dio->error) + if (should_report_dio_fserror(dio)) fserror_report_io(file_inode(iocb->ki_filp), iomap_dio_err_type(dio), offset, dio->size, dio->error, GFP_NOFS);
diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index e4d57cb..60546fa 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c
@@ -69,11 +69,57 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) return folio_count; } +static DEFINE_SPINLOCK(failed_ioend_lock); +static LIST_HEAD(failed_ioend_list); + +static void +iomap_fail_ioends( + struct work_struct *work) +{ + struct iomap_ioend *ioend; + struct list_head tmp; + unsigned long flags; + + spin_lock_irqsave(&failed_ioend_lock, flags); + list_replace_init(&failed_ioend_list, &tmp); + spin_unlock_irqrestore(&failed_ioend_lock, flags); + + while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, + io_list))) { + list_del_init(&ioend->io_list); + iomap_finish_ioend_buffered(ioend); + cond_resched(); + } +} + +static DECLARE_WORK(failed_ioend_work, iomap_fail_ioends); + +static void iomap_fail_ioend_buffered(struct iomap_ioend *ioend) +{ + unsigned long flags; + + /* + * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions + * in the fserror code. The caller no longer owns the ioend reference + * after the spinlock drops. + */ + spin_lock_irqsave(&failed_ioend_lock, flags); + if (list_empty(&failed_ioend_list)) + WARN_ON_ONCE(!schedule_work(&failed_ioend_work)); + list_add_tail(&ioend->io_list, &failed_ioend_list); + spin_unlock_irqrestore(&failed_ioend_lock, flags); +} + static void ioend_writeback_end_bio(struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); ioend->io_error = blk_status_to_errno(bio->bi_status); + if (ioend->io_error) { + iomap_fail_ioend_buffered(ioend); + return; + } + iomap_finish_ioend_buffered(ioend); } @@ -169,17 +215,18 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, WARN_ON_ONCE(!folio->private && map_len < dirty_len); switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - return -EIO; + case IOMAP_UNWRITTEN: + ioend_flags |= IOMAP_IOEND_UNWRITTEN; + break; + case IOMAP_MAPPED: + break; case IOMAP_HOLE: return map_len; default: - break; + WARN_ON_ONCE(1); + return -EIO; } - if (wpc->iomap.type == IOMAP_UNWRITTEN) - ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; if (folio_test_dropbehind(folio))
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index de89c5b..1508e2f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c
@@ -267,7 +267,15 @@ int jbd2_log_do_checkpoint(journal_t *journal) */ BUFFER_TRACE(bh, "queue"); get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); + if (WARN_ON_ONCE(buffer_jwrite(bh))) { + put_bh(bh); /* drop the ref we just took */ + spin_unlock(&journal->j_list_lock); + /* Clean up any previously batched buffers */ + if (batch_count) + __flush_batch(journal, &batch_count); + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } journal->j_chkpt_bhs[batch_count++] = bh; transaction->t_chp_stats.cs_written++; transaction->t_checkpoint_list = jh->b_cpnext; @@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal) if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; - J_ASSERT(blocknr != 0); + if (WARN_ON_ONCE(blocknr == 0)) { + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } /* * We need to make sure that any blocks that were recently written out
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 7da66ca..abec438 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c
@@ -247,7 +247,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode) j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { iput(inode); - return ERR_PTR(-ENOSPC); + return ERR_PTR(-EFSCORRUPTED); } inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = j;
diff --git a/fs/mpage.c b/fs/mpage.c index 7dae5af..b3d9f23 100644 --- a/fs/mpage.c +++ b/fs/mpage.c
@@ -646,17 +646,24 @@ static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio, } /** - * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them + * __mpage_writepages - walk the list of dirty pages of the given address space + * & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. + * @write_folio: handler to call for each folio before calling + * mpage_write_folio() * * This is a library function, which implements the writepages() - * address_space_operation. + * address_space_operation. It calls @write_folio handler for each folio. If + * the handler returns value > 0, it calls mpage_write_folio() to do the + * folio writeback. */ int -mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block) +__mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)) { struct mpage_data mpd = { .get_block = get_block, @@ -666,11 +673,22 @@ mpage_writepages(struct address_space *mapping, int error; blk_start_plug(&plug); - while ((folio = writeback_iter(mapping, wbc, folio, &error))) + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (write_folio) { + error = write_folio(folio, wbc); + /* + * == 0 means folio is handled, < 0 means error. In + * both cases hand back control to writeback_iter() + */ + if (error <= 0) + continue; + /* Let mpage_write_folio() handle the folio. */ + } error = mpage_write_folio(wbc, folio, &mpd); + } if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); return error; } -EXPORT_SYMBOL(mpage_writepages); +EXPORT_SYMBOL(__mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c index 58f715f..9e5500d 100644 --- a/fs/namei.c +++ b/fs/namei.c
@@ -2437,8 +2437,14 @@ u64 hashlen_string(const void *salt, const char *name) EXPORT_SYMBOL(hashlen_string); /* - * Calculate the length and hash of the path component, and - * return the length as the result. + * hash_name - Calculate the length and hash of the path component + * @nd: the path resolution state + * @name: the pathname to read the component from + * @lastword: if the component fits in a single word, LAST_WORD_IS_DOT, + * LAST_WORD_IS_DOTDOT, or some other value depending on whether the + * component is '.', '..', or something else. Otherwise, @lastword is 0. + * + * Returns: a pointer to the terminating '/' or NUL character in @name. */ static inline const char *hash_name(struct nameidata *nd, const char *name,
diff --git a/fs/namespace.c b/fs/namespace.c index ebe19de..854f4fc 100644 --- a/fs/namespace.c +++ b/fs/namespace.c
@@ -1531,23 +1531,33 @@ static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_mounts *p = m->private; + struct mount *mnt; down_read(&namespace_sem); - return mnt_find_id_at(p->ns, *pos); + mnt = mnt_find_id_at(p->ns, *pos); + if (mnt) + *pos = mnt->mnt_id_unique; + return mnt; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct mount *next = NULL, *mnt = v; + struct mount *mnt = v; struct rb_node *node = rb_next(&mnt->mnt_node); - ++*pos; if (node) { - next = node_to_mount(node); + struct mount *next = node_to_mount(node); *pos = next->mnt_id_unique; + return next; } - return next; + + /* + * No more mounts. Set pos past current mount's ID so that if + * iteration restarts, mnt_find_id_at() returns NULL. + */ + *pos = mnt->mnt_id_unique + 1; + return NULL; } static void m_stop(struct seq_file *m, void *v) @@ -2791,7 +2801,8 @@ static inline void unlock_mount(struct pinned_mountpoint *m) } static void lock_mount_exact(const struct path *path, - struct pinned_mountpoint *mp); + struct pinned_mountpoint *mp, bool copy_mount, + unsigned int copy_flags); #define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ @@ -2799,7 +2810,10 @@ static void lock_mount_exact(const struct path *path, #define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false) #define LOCK_MOUNT_EXACT(mp, path) \ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ - lock_mount_exact((path), &mp) + lock_mount_exact((path), &mp, false, 0) +#define LOCK_MOUNT_EXACT_COPY(mp, path, copy_flags) \ + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ + lock_mount_exact((path), &mp, true, (copy_flags)) static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp) { @@ -3073,16 +3087,13 @@ static struct file *open_detached_copy(struct path *path, unsigned int flags) return file; } -DEFINE_FREE(put_empty_mnt_ns, struct mnt_namespace *, - if (!IS_ERR_OR_NULL(_T)) free_mnt_ns(_T)) - static struct mnt_namespace *create_new_namespace(struct path *path, unsigned int flags) { - struct mnt_namespace *new_ns __free(put_empty_mnt_ns) = NULL; - struct path to_path __free(path_put) = {}; struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct user_namespace *user_ns = current_user_ns(); - struct mount *new_ns_root; + struct mnt_namespace *new_ns; + struct mount *new_ns_root, *old_ns_root; + struct path to_path; struct mount *mnt; unsigned int copy_flags = 0; bool locked = false; @@ -3094,71 +3105,63 @@ static struct mnt_namespace *create_new_namespace(struct path *path, unsigned in if (IS_ERR(new_ns)) return ERR_CAST(new_ns); - scoped_guard(namespace_excl) { - new_ns_root = clone_mnt(ns->root, ns->root->mnt.mnt_root, copy_flags); - if (IS_ERR(new_ns_root)) - return ERR_CAST(new_ns_root); + old_ns_root = ns->root; + to_path.mnt = &old_ns_root->mnt; + to_path.dentry = old_ns_root->mnt.mnt_root; - /* - * If the real rootfs had a locked mount on top of it somewhere - * in the stack, lock the new mount tree as well so it can't be - * exposed. - */ - mnt = ns->root; - while (mnt->overmount) { - mnt = mnt->overmount; - if (mnt->mnt.mnt_flags & MNT_LOCKED) - locked = true; - } + VFS_WARN_ON_ONCE(old_ns_root->mnt.mnt_sb->s_type != &nullfs_fs_type); + + LOCK_MOUNT_EXACT_COPY(mp, &to_path, copy_flags); + if (IS_ERR(mp.parent)) { + free_mnt_ns(new_ns); + return ERR_CAST(mp.parent); + } + new_ns_root = mp.parent; + + /* + * If the real rootfs had a locked mount on top of it somewhere + * in the stack, lock the new mount tree as well so it can't be + * exposed. + */ + mnt = old_ns_root; + while (mnt->overmount) { + mnt = mnt->overmount; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + locked = true; } /* - * We dropped the namespace semaphore so we can actually lock - * the copy for mounting. The copied mount isn't attached to any - * mount namespace and it is thus excluded from any propagation. - * So realistically we're isolated and the mount can't be - * overmounted. - */ - - /* Borrow the reference from clone_mnt(). */ - to_path.mnt = &new_ns_root->mnt; - to_path.dentry = dget(new_ns_root->mnt.mnt_root); - - /* Now lock for actual mounting. */ - LOCK_MOUNT_EXACT(mp, &to_path); - if (unlikely(IS_ERR(mp.parent))) - return ERR_CAST(mp.parent); - - /* - * We don't emulate unshare()ing a mount namespace. We stick to the - * restrictions of creating detached bind-mounts. It has a lot - * saner and simpler semantics. + * We don't emulate unshare()ing a mount namespace. We stick + * to the restrictions of creating detached bind-mounts. It + * has a lot saner and simpler semantics. */ mnt = __do_loopback(path, flags, copy_flags); - if (IS_ERR(mnt)) - return ERR_CAST(mnt); - scoped_guard(mount_writer) { + if (IS_ERR(mnt)) { + emptied_ns = new_ns; + umount_tree(new_ns_root, 0); + return ERR_CAST(mnt); + } + if (locked) mnt->mnt.mnt_flags |= MNT_LOCKED; /* - * Now mount the detached tree on top of the copy of the - * real rootfs we created. + * now mount the detached tree on top of the copy + * of the real rootfs we created. */ attach_mnt(mnt, new_ns_root, mp.mp); if (user_ns != ns->user_ns) lock_mnt_tree(new_ns_root); } - /* Add all mounts to the new namespace. */ - for (struct mount *p = new_ns_root; p; p = next_mnt(p, new_ns_root)) { - mnt_add_to_ns(new_ns, p); + for (mnt = new_ns_root; mnt; mnt = next_mnt(mnt, new_ns_root)) { + mnt_add_to_ns(new_ns, mnt); new_ns->nr_mounts++; } - new_ns->root = real_mount(no_free_ptr(to_path.mnt)); + new_ns->root = new_ns_root; ns_tree_add_raw(new_ns); - return no_free_ptr(new_ns); + return new_ns; } static struct file *open_new_namespace(struct path *path, unsigned int flags) @@ -3840,16 +3843,20 @@ static int do_new_mount(const struct path *path, const char *fstype, } static void lock_mount_exact(const struct path *path, - struct pinned_mountpoint *mp) + struct pinned_mountpoint *mp, bool copy_mount, + unsigned int copy_flags) { struct dentry *dentry = path->dentry; int err; + /* Assert that inode_lock() locked the correct inode. */ + VFS_WARN_ON_ONCE(copy_mount && !path_mounted(path)); + inode_lock(dentry->d_inode); namespace_lock(); if (unlikely(cant_mount(dentry))) err = -ENOENT; - else if (path_overmounted(path)) + else if (!copy_mount && path_overmounted(path)) err = -EBUSY; else err = get_mountpoint(dentry, mp); @@ -3857,9 +3864,15 @@ static void lock_mount_exact(const struct path *path, namespace_unlock(); inode_unlock(dentry->d_inode); mp->parent = ERR_PTR(err); - } else { - mp->parent = real_mount(path->mnt); + return; } + + if (copy_mount) + mp->parent = clone_mnt(real_mount(path->mnt), dentry, copy_flags); + else + mp->parent = real_mount(path->mnt); + if (unlikely(IS_ERR(mp->parent))) + __unlock_mount(mp); } int finish_automount(struct vfsmount *__m, const struct path *path) @@ -5678,6 +5691,8 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, s->mnt = mnt_file->f_path.mnt; ns = real_mount(s->mnt)->mnt_ns; + if (IS_ERR(ns)) + return PTR_ERR(ns); if (!ns) /* * We can't set mount point and mnt_ns_id since we don't have a
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 88a0d80..a8c0d86 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c
@@ -171,9 +171,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq, spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); }
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index a498ee8..f72e6da 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c
@@ -71,9 +71,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); }
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index a9d1c3b..f9ab69d 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c
@@ -10,6 +10,209 @@ #include "internal.h" /* + * Perform the cleanup rituals after an unbuffered write is complete. + */ +static void netfs_unbuffered_write_done(struct netfs_io_request *wreq) +{ + struct netfs_inode *ictx = netfs_inode(wreq->inode); + + _enter("R=%x", wreq->debug_id); + + /* Okay, declare that all I/O is complete. */ + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); + + if (!wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); + + if (wreq->origin == NETFS_DIO_WRITE && + wreq->mapping->nrpages) { + /* mmap may have got underfoot and we may now have folios + * locally covering the region we just wrote. Attempt to + * discard the folios, but leave in place any modified locally. + * ->write_iter() is prevented from interfering by the DIO + * counter. + */ + pgoff_t first = wreq->start >> PAGE_SHIFT; + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; + + invalidate_inode_pages2_range(wreq->mapping, first, last); + } + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_end(wreq->inode); + + _debug("finished"); + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ + + if (wreq->iocb) { + size_t written = umin(wreq->transferred, wreq->len); + + wreq->iocb->ki_pos += written; + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); + wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written); + } + wreq->iocb = VFS_PTR_POISON; + } + + netfs_clear_subrequests(wreq); +} + +/* + * Collect the subrequest results of unbuffered write subrequests. + */ +static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + struct netfs_io_subrequest *subreq) +{ + trace_netfs_collect_sreq(wreq, subreq); + + spin_lock(&wreq->lock); + list_del_init(&subreq->rreq_link); + spin_unlock(&wreq->lock); + + wreq->transferred += subreq->transferred; + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); + + stream->collected_to = subreq->start + subreq->transferred; + wreq->collected_to = stream->collected_to; + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); + + trace_netfs_collect_stream(wreq, stream); + trace_netfs_collect_state(wreq, wreq->collected_to, 0); +} + +/* + * Write data to the server without going through the pagecache and without + * writing it to the local cache. We dispatch the subrequests serially and + * wait for each to complete before dispatching the next, lest we leave a gap + * in the data written due to a failure such as ENOSPC. We could, however + * attempt to do preparation such as content encryption for the next subreq + * whilst the current is in progress. + */ +static int netfs_unbuffered_write(struct netfs_io_request *wreq) +{ + struct netfs_io_subrequest *subreq = NULL; + struct netfs_io_stream *stream = &wreq->io_streams[0]; + int ret; + + _enter("%llx", wreq->len); + + if (wreq->origin == NETFS_DIO_WRITE) + inode_dio_begin(wreq->inode); + + stream->collected_to = wreq->start; + + for (;;) { + bool retry = false; + + if (!subreq) { + netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred); + subreq = stream->construct; + stream->construct = NULL; + } + + /* Check if (re-)preparation failed. */ + if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) { + netfs_write_subrequest_terminated(subreq, subreq->error); + wreq->error = subreq->error; + break; + } + + iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred); + if (!iov_iter_count(&subreq->io_iter)) + break; + + subreq->len = netfs_limit_iter(&subreq->io_iter, 0, + stream->sreq_max_len, + stream->sreq_max_segs); + iov_iter_truncate(&subreq->io_iter, subreq->len); + stream->submit_extendable_to = subreq->len; + + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); + stream->issue_write(subreq); + + /* Async, need to wait. */ + netfs_wait_for_in_progress_stream(wreq, stream); + + if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { + retry = true; + } else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { + ret = subreq->error; + wreq->error = ret; + netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed); + subreq = NULL; + break; + } + ret = 0; + + if (!retry) { + netfs_unbuffered_write_collect(wreq, stream, subreq); + subreq = NULL; + if (wreq->transferred >= wreq->len) + break; + if (!wreq->iocb && signal_pending(current)) { + ret = wreq->transferred ? -EINTR : -ERESTARTSYS; + trace_netfs_rreq(wreq, netfs_rreq_trace_intr); + break; + } + continue; + } + + /* We need to retry the last subrequest, so first reset the + * iterator, taking into account what, if anything, we managed + * to transfer. + */ + subreq->error = -EAGAIN; + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); + if (subreq->transferred > 0) + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); + + if (stream->source == NETFS_UPLOAD_TO_SERVER && + wreq->netfs_ops->retry_request) + wreq->netfs_ops->retry_request(wreq, stream); + + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); + __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); + __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); + subreq->io_iter = wreq->buffer.iter; + subreq->start = wreq->start + wreq->transferred; + subreq->len = wreq->len - wreq->transferred; + subreq->transferred = 0; + subreq->retry_count += 1; + stream->sreq_max_len = UINT_MAX; + stream->sreq_max_segs = INT_MAX; + + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + + if (stream->prepare_write) { + stream->prepare_write(subreq); + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_wh_retry_write_subreq); + } else { + struct iov_iter source; + + netfs_reset_iter(subreq); + source = subreq->io_iter; + netfs_reissue_write(stream, subreq, &source); + } + } + + netfs_unbuffered_write_done(wreq); + _leave(" = %d", ret); + return ret; +} + +static void netfs_unbuffered_write_async(struct work_struct *work) +{ + struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); + + netfs_unbuffered_write(wreq); + netfs_put_request(wreq, netfs_rreq_trace_put_complete); +} + +/* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. */ @@ -70,35 +273,35 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ wreq->buffer.iter = *iter; } + + wreq->len = iov_iter_count(&wreq->buffer.iter); } __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); - if (async) - __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); /* Copy the data into the bounce buffer and encrypt it. */ // TODO /* Dispatch the write. */ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); - if (async) + + if (async) { + INIT_WORK(&wreq->work, netfs_unbuffered_write_async); wreq->iocb = iocb; - wreq->len = iov_iter_count(&wreq->buffer.iter); - ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); - if (ret < 0) { - _debug("begin = %zd", ret); - goto out; - } - - if (!async) { - ret = netfs_wait_for_write(wreq); - if (ret > 0) - iocb->ki_pos += ret; - } else { + queue_work(system_dfl_wq, &wreq->work); ret = -EIOCBQUEUED; + } else { + ret = netfs_unbuffered_write(wreq); + if (ret < 0) { + _debug("begin = %zd", ret); + } else { + iocb->ki_pos += wreq->transferred; + ret = wreq->transferred ?: wreq->error; + } + + netfs_put_request(wreq, netfs_rreq_trace_put_complete); } -out: netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 4319611..d436e20 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h
@@ -198,6 +198,9 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, struct file *file, loff_t start, enum netfs_io_origin origin); +void netfs_prepare_write(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + loff_t start); void netfs_reissue_write(struct netfs_io_stream *stream, struct netfs_io_subrequest *subreq, struct iov_iter *source); @@ -212,7 +215,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio **writethrough_cache); ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, struct folio *writethrough_cache); -int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); /* * write_retry.c
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 72a435e..154a14bb 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c
@@ -143,6 +143,47 @@ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset, } /* + * Select the span of a kvec iterator we're going to use. Limit it by both + * maximum size and maximum number of segments. Returns the size of the span + * in bytes. + */ +static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs) +{ + const struct kvec *kvecs = iter->kvec; + unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset + start_offset; + + if (WARN_ON(!iov_iter_is_kvec(iter)) || + WARN_ON(start_offset > n) || + n == 0) + return 0; + + while (n && ix < nkv && skip) { + len = kvecs[ix].iov_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nkv) { + len = min3(n, kvecs[ix].iov_len - skip, max_size); + span += len; + nsegs++; + ix++; + if (span >= max_size || nsegs >= max_segs) + break; + skip = 0; + n -= len; + } + + return min(span, max_size); +} + +/* * Select the span of an xarray iterator we're going to use. Limit it by both * maximum size and maximum number of segments. It is assumed that segments * can be larger than a page in size, provided they're physically contiguous. @@ -245,6 +286,8 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, return netfs_limit_bvec(iter, start_offset, max_size, max_segs); if (iov_iter_is_xarray(iter)) return netfs_limit_xarray(iter, start_offset, max_size, max_segs); + if (iov_iter_is_kvec(iter)) + return netfs_limit_kvec(iter, start_offset, max_size, max_segs); BUG(); } EXPORT_SYMBOL(netfs_limit_iter);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 137f0e2..e5f6665 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c
@@ -205,7 +205,8 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) * in progress. The issuer thread may be adding stuff to the tail * whilst we're doing this. */ - front = READ_ONCE(stream->front); + front = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); while (front) { size_t transferred; @@ -301,7 +302,6 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); - stream->front = front; spin_unlock(&rreq->lock); netfs_put_subrequest(remove, notes & ABANDON_SREQ ?
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 7793ba5..cca9ac4 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c
@@ -93,8 +93,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) from->start, from->transferred, from->len); if (test_bit(NETFS_SREQ_FAILED, &from->flags) || - !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) { + subreq = from; goto abandon; + } list_for_each_continue(next, &stream->subrequests) { subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); @@ -178,6 +180,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) if (subreq == to) break; } + subreq = NULL; continue; }
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index 8e6264f..d0e23bc 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c
@@ -107,7 +107,6 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); trace_netfs_sreq(subreq, netfs_sreq_trace_added); - stream->front = subreq; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); spin_unlock(&rreq->lock);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 61eab34..b194447 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c
@@ -228,7 +228,8 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (!smp_load_acquire(&stream->active)) continue; - front = stream->front; + front = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); while (front) { trace_netfs_collect_sreq(wreq, front); //_debug("sreq [%x] %llx %zx/%zx", @@ -279,7 +280,6 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); - stream->front = front; spin_unlock(&wreq->lock); netfs_put_subrequest(remove, notes & SAW_FAILURE ? @@ -399,27 +399,6 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if ((wreq->origin == NETFS_UNBUFFERED_WRITE || - wreq->origin == NETFS_DIO_WRITE) && - !wreq->error) - netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); - - if (wreq->origin == NETFS_DIO_WRITE && - wreq->mapping->nrpages) { - /* mmap may have got underfoot and we may now have folios - * locally covering the region we just wrote. Attempt to - * discard the folios, but leave in place any modified locally. - * ->write_iter() is prevented from interfering by the DIO - * counter. - */ - pgoff_t first = wreq->start >> PAGE_SHIFT; - pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; - invalidate_inode_pages2_range(wreq->mapping, first, last); - } - - if (wreq->origin == NETFS_DIO_WRITE) - inode_dio_end(wreq->inode); - _debug("finished"); netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 34894da..2db688f 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c
@@ -154,9 +154,9 @@ EXPORT_SYMBOL(netfs_prepare_write_failed); * Prepare a write subrequest. We need to allocate a new subrequest * if we don't have one. */ -static void netfs_prepare_write(struct netfs_io_request *wreq, - struct netfs_io_stream *stream, - loff_t start) +void netfs_prepare_write(struct netfs_io_request *wreq, + struct netfs_io_stream *stream, + loff_t start) { struct netfs_io_subrequest *subreq; struct iov_iter *wreq_iter = &wreq->buffer.iter; @@ -206,9 +206,8 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, spin_lock(&wreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Write list pointers before active flag */ smp_store_release(&stream->active, true); } @@ -699,41 +698,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c } /* - * Write data to the server without going through the pagecache and without - * writing it to the local cache. - */ -int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) -{ - struct netfs_io_stream *upload = &wreq->io_streams[0]; - ssize_t part; - loff_t start = wreq->start; - int error = 0; - - _enter("%zx", len); - - if (wreq->origin == NETFS_DIO_WRITE) - inode_dio_begin(wreq->inode); - - while (len) { - // TODO: Prepare content encryption - - _debug("unbuffered %zx", len); - part = netfs_advance_write(wreq, upload, start, len, false); - start += part; - len -= part; - rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) - netfs_wait_for_paused_write(wreq); - if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) - break; - } - - netfs_end_issue_write(wreq); - _leave(" = %d", error); - return error; -} - -/* * Write some of a pending folio data back to the server and/or the cache. */ static int netfs_write_folio_single(struct netfs_io_request *wreq,
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 12cb0ca..6bb3054 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig
@@ -87,7 +87,7 @@ space programs which can be found in the Linux nfs-utils package, available from http://linux-nfs.org/. - If unsure, say Y. + If unsure, say N. config NFS_SWAP bool "Provide swap over NFS support" @@ -100,6 +100,7 @@ config NFS_V4_0 bool "NFS client support for NFSv4.0" depends on NFS_V4 + default y help This option enables support for minor version 0 of the NFSv4 protocol (RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3e2de45..be2aebf 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c
@@ -392,8 +392,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (status != 0) goto out_release_acls; - if (d_alias) + if (d_alias) { + if (d_is_dir(d_alias)) { + status = -EISDIR; + goto out_dput; + } dentry = d_alias; + } /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8fdbba7..8e8a76a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c
@@ -36,19 +36,30 @@ * second map contains a reference to the entry in the first map. */ +static struct workqueue_struct *nfsd_export_wq; + #define EXPKEY_HASHBITS 8 #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put(struct kref *ref) +static void expkey_release(struct work_struct *work) { - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + struct svc_expkey *key = container_of(to_rcu_work(work), + struct svc_expkey, ek_rwork); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree_rcu(key, ek_rcu); + kfree(key); +} + +static void expkey_put(struct kref *ref) +{ + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + INIT_RCU_WORK(&key->ek_rwork, expkey_release); + queue_rcu_work(nfsd_export_wq, &key->ek_rwork); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -353,11 +364,13 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_release(struct rcu_head *rcu_head) +static void svc_export_release(struct work_struct *work) { - struct svc_export *exp = container_of(rcu_head, struct svc_export, - ex_rcu); + struct svc_export *exp = container_of(to_rcu_work(work), + struct svc_export, ex_rwork); + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); @@ -369,9 +382,8 @@ static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - path_put(&exp->ex_path); - auth_domain_put(exp->ex_client); - call_rcu(&exp->ex_rcu, svc_export_release); + INIT_RCU_WORK(&exp->ex_rwork, svc_export_release); + queue_rcu_work(nfsd_export_wq, &exp->ex_rwork); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) @@ -1479,6 +1491,36 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; +/** + * nfsd_export_wq_init - allocate the export release workqueue + * + * Called once at module load. The workqueue runs deferred svc_export and + * svc_expkey release work scheduled by queue_rcu_work() in the cache put + * callbacks. + * + * Return values: + * %0: workqueue allocated + * %-ENOMEM: allocation failed + */ +int nfsd_export_wq_init(void) +{ + nfsd_export_wq = alloc_workqueue("nfsd_export", WQ_UNBOUND, 0); + if (!nfsd_export_wq) + return -ENOMEM; + return 0; +} + +/** + * nfsd_export_wq_shutdown - drain and free the export release workqueue + * + * Called once at module unload. Per-namespace teardown in + * nfsd_export_shutdown() has already drained all deferred work. + */ +void nfsd_export_wq_shutdown(void) +{ + destroy_workqueue(nfsd_export_wq); +} + /* * Initialize the exports module. */ @@ -1540,6 +1582,9 @@ nfsd_export_shutdown(struct net *net) cache_unregister_net(nn->svc_expkey_cache, net); cache_unregister_net(nn->svc_export_cache, net); + /* Drain deferred export and expkey release work. */ + rcu_barrier(); + flush_workqueue(nfsd_export_wq); cache_destroy_net(nn->svc_expkey_cache, net); cache_destroy_net(nn->svc_export_cache, net); svcauth_unix_purge(net);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d2b09cd..b053993 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h
@@ -7,6 +7,7 @@ #include <linux/sunrpc/cache.h> #include <linux/percpu_counter.h> +#include <linux/workqueue.h> #include <uapi/linux/nfsd/export.h> #include <linux/nfs4.h> @@ -75,7 +76,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_head ex_rcu; + struct rcu_work ex_rwork; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,7 +93,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_head ek_rcu; + struct rcu_work ek_rwork; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) @@ -110,6 +111,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, /* * Function declarations */ +int nfsd_export_wq_init(void); +void nfsd_export_wq_shutdown(void); int nfsd_export_init(struct net *); void nfsd_export_shutdown(struct net *); void nfsd_export_flush(struct net *);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 41dfba5..9d23491 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c
@@ -6281,9 +6281,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = len; - read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT, + if (len <= NFSD4_REPLAY_ISIZE) { + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, + op_status_offset + XDR_UNIT, so->so_replay.rp_buf, len); + } else { + so->so_replay.rp_buflen = 0; + } } status: op->status = nfsd4_map_status(op->status,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index e9acd2c..71aabda 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c
@@ -149,9 +149,19 @@ static int exports_net_open(struct net *net, struct file *file) seq = file->private_data; seq->private = nn->svc_export_cache; + get_net(net); return 0; } +static int exports_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct cache_detail *cd = seq->private; + + put_net(cd->net); + return seq_release(inode, file); +} + static int exports_nfsd_open(struct inode *inode, struct file *file) { return exports_net_open(inode->i_sb->s_fs_info, file); @@ -161,7 +171,7 @@ static const struct file_operations exports_nfsd_operations = { .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = exports_release, }; static int export_features_show(struct seq_file *m, void *v) @@ -377,15 +387,15 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) } /* - * write_threads - Start NFSD, or report the current number of running threads + * write_threads - Start NFSD, or report the configured number of threads * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C - * string numeric value representing the number of - * running NFSD threads; + * string numeric value representing the configured + * number of NFSD threads; * return code is the size in bytes of the string * On error: return code is zero * @@ -399,8 +409,8 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) * Output: * On success: NFS service is started; * passed-in buffer filled with '\n'-terminated C - * string numeric value representing the number of - * running NFSD threads; + * string numeric value representing the configured + * number of NFSD threads; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ @@ -430,7 +440,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) } /* - * write_pool_threads - Set or report the current number of threads per pool + * write_pool_threads - Set or report the configured number of threads per pool * * Input: * buf: ignored @@ -447,7 +457,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing integer values representing the - * number of NFSD threads in each pool; + * configured number of NFSD threads in each pool; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ @@ -1376,7 +1386,7 @@ static const struct proc_ops exports_proc_ops = { .proc_open = exports_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = exports_release, }; static int create_proc_exports_entry(void) @@ -1647,7 +1657,7 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) if (attr) nn->min_threads = nla_get_u32(attr); - ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); + ret = nfsd_svc(nrpools, nthreads, net, current_cred(), scope); if (ret > 0) ret = 0; out_unlock: @@ -1657,7 +1667,7 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) } /** - * nfsd_nl_threads_get_doit - get the number of running threads + * nfsd_nl_threads_get_doit - get the maximum number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * @@ -1700,7 +1710,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, - sp->sp_nrthreads); + sp->sp_nrthrmax); if (err) goto err_unlock; } @@ -2000,7 +2010,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) } ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, - get_current_cred()); + current_cred()); /* always save the latest error */ if (ret < 0) err = ret; @@ -2259,9 +2269,12 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ + retval = nfsd_export_wq_init(); + if (retval) + goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_lockd; + goto out_free_export_wq; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -2290,6 +2303,8 @@ static int __init init_nfsd(void) unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); +out_free_export_wq: + nfsd_export_wq_shutdown(); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -2310,6 +2325,7 @@ static void __exit exit_nfsd(void) nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); + nfsd_export_wq_shutdown(); nfsd_drc_slab_free(); nfsd_lockd_shutdown(); nfsd4_free_slabs();
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0887ee6..4a04208 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c
@@ -239,12 +239,13 @@ static void nfsd_net_free(struct percpu_ref *ref) int nfsd_nrthreads(struct net *net) { - int rv = 0; + int i, rv = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) - rv = nn->nfsd_serv->sv_nrthreads; + for (i = 0; i < nn->nfsd_serv->sv_nrpools; ++i) + rv += nn->nfsd_serv->sv_pools[i].sp_nrthrmax; mutex_unlock(&nfsd_mutex); return rv; } @@ -659,7 +660,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) if (serv) for (i = 0; i < serv->sv_nrpools && i < n; i++) - nthreads[i] = serv->sv_pools[i].sp_nrthreads; + nthreads[i] = serv->sv_pools[i].sp_nrthrmax; return 0; }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6fcbf1e..c0ca115 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h
@@ -541,11 +541,18 @@ struct nfs4_client_reclaim { struct xdr_netobj cr_princhash; }; -/* A reasonable value for REPLAY_ISIZE was estimated as follows: - * The OPEN response, typically the largest, requires - * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + - * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + - * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes +/* + * REPLAY_ISIZE is sized for an OPEN response with delegation: + * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + + * 8(verifier) + 4(deleg. type) + 8(deleg. stateid) + + * 4(deleg. recall flag) + 20(deleg. space limit) + + * ~32(deleg. ace) = 112 bytes + * + * Some responses can exceed this. A LOCK denial includes the conflicting + * lock owner, which can be up to 1024 bytes (NFS4_OPAQUE_LIMIT). Responses + * larger than REPLAY_ISIZE are not cached in rp_ibuf; only rp_status is + * saved. Enlarging this constant increases the size of every + * nfs4_stateowner. */ #define NFSD4_REPLAY_ISIZE 112
diff --git a/fs/nsfs.c b/fs/nsfs.c index db91de2..c215878 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c
@@ -199,6 +199,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd) return false; } +static bool may_use_nsfs_ioctl(unsigned int cmd) +{ + switch (_IOC_NR(cmd)) { + case _IOC_NR(NS_MNT_GET_NEXT): + fallthrough; + case _IOC_NR(NS_MNT_GET_PREV): + return may_see_all_namespaces(); + } + return true; +} + static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -214,6 +225,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, if (!nsfs_ioctl_valid(ioctl)) return -ENOIOCTLCMD; + if (!may_use_nsfs_ioctl(ioctl)) + return -EPERM; ns = get_proc_ns(file_inode(filp)); switch (ioctl) { @@ -614,7 +627,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, return ERR_PTR(-EOPNOTSUPP); } - if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) { + if (owning_ns && !may_see_all_namespaces()) { ns->ops->put(ns); return ERR_PTR(-EPERM); }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 03a5166..a2ccd80 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c
@@ -1505,6 +1505,16 @@ int ocfs2_validate_inode_block(struct super_block *sb, goto bail; } + if (le16_to_cpu(data->id_count) > + ocfs2_max_inline_data_with_xattr(sb, di)) { + rc = ocfs2_error(sb, + "Invalid dinode #%llu: inline data id_count %u exceeds max %d\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(data->id_count), + ocfs2_max_inline_data_with_xattr(sb, di)); + goto bail; + } + if (le64_to_cpu(di->i_size) > le16_to_cpu(data->id_count)) { rc = ocfs2_error(sb, "Invalid dinode #%llu: inline data i_size %llu exceeds id_count %u\n",
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 758611e..13cb60b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c
@@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return -EOVERFLOW; /* - * With metacopy disabled, we fsync after final metadata copyup, for + * With "fsync=strict", we fsync after final metadata copyup, for * both regular files and directories to get atomic copyup semantics * on filesystems that do not use strict metadata ordering (e.g. ubifs). * - * With metacopy enabled we want to avoid fsync on all meta copyup + * By default, we want to avoid fsync on all meta copyup, because * that will hurt performance of workloads such as chown -R, so we * only fsync on data copyup as legacy behavior. */ - ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy && + ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) && (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode)); ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cad2055..63b299b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h
@@ -99,6 +99,12 @@ enum { OVL_VERITY_REQUIRE, }; +enum { + OVL_FSYNC_VOLATILE, + OVL_FSYNC_AUTO, + OVL_FSYNC_STRICT, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs) return ofs->config.xino == OVL_XINO_ON; } +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE; +} + +static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode == OVL_FSYNC_STRICT; +} + +static inline bool ovl_is_volatile(struct ovl_config *config) +{ + return config->fsync_mode == OVL_FSYNC_VOLATILE; +} + /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used.
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 1d4828db..80cad4e 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h
@@ -18,7 +18,7 @@ struct ovl_config { int xino; bool metacopy; bool userxattr; - bool ovl_volatile; + int fsync_mode; }; struct ovl_sb { @@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb) return (struct ovl_fs *)sb->s_fs_info; } -static inline bool ovl_should_sync(struct ovl_fs *ofs) -{ - return !ofs->config.ovl_volatile; -} - static inline unsigned int ovl_numlower(struct ovl_entry *oe) { return oe ? oe->__numlower : 0;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8111b43..c93fcaa 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c
@@ -58,6 +58,7 @@ enum ovl_opt { Opt_xino, Opt_metacopy, Opt_verity, + Opt_fsync, Opt_volatile, Opt_override_creds, }; @@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } +static const struct constant_table ovl_parameter_fsync[] = { + { "volatile", OVL_FSYNC_VOLATILE }, + { "auto", OVL_FSYNC_AUTO }, + { "strict", OVL_FSYNC_STRICT }, + {} +}; + +static const char *ovl_fsync_mode(struct ovl_config *config) +{ + return ovl_parameter_fsync[config->fsync_mode].name; +} + +static int ovl_fsync_mode_def(void) +{ + return OVL_FSYNC_AUTO; +} + const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_file_or_string("lowerdir+", Opt_lowerdir_add), @@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_enum("xino", Opt_xino, ovl_parameter_xino), fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), + fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync), fsparam_flag("volatile", Opt_volatile), fsparam_flag_no("override_creds", Opt_override_creds), {} @@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_verity: config->verity_mode = result.uint_32; break; + case Opt_fsync: + config->fsync_mode = result.uint_32; + break; case Opt_volatile: - config->ovl_volatile = true; + config->fsync_mode = OVL_FSYNC_VOLATILE; break; case Opt_userxattr: config->userxattr = true; @@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc) ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; + ofs->config.fsync_mode = ovl_fsync_mode_def(); fc->s_fs_info = ofs; fc->fs_private = ctx; @@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->index = false; } - if (!config->upperdir && config->ovl_volatile) { + if (!config->upperdir && ovl_is_volatile(config)) { pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); - config->ovl_volatile = false; + config->fsync_mode = ovl_fsync_mode_def(); } if (!config->upperdir && config->uuid == OVL_UUID_ON) { @@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); - if (ofs->config.ovl_volatile) - seq_puts(m, ",volatile"); + if (ofs->config.fsync_mode != ovl_fsync_mode_def()) + seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config)); if (ofs->config.userxattr) seq_puts(m, ",userxattr"); if (ofs->config.verity_mode != ovl_verity_mode_def())
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d4c12fee..0822987 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c
@@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * For volatile mount, create a incompat/volatile/dirty file to keep * track of it. */ - if (ofs->config.ovl_volatile) { + if (ovl_is_volatile(&ofs->config)) { err = ovl_create_volatile_dirty(ofs); if (err < 0) { pr_err("Failed to create volatile/dirty file.\n");
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 3f1b763..2ea769f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c
@@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb) if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; - return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; + if (sb->s_export_op->encode_fh == generic_encode_ino32_fh) + return FILEID_INO32_GEN; + + return -1; } struct dentry *ovl_indexdir(struct super_block *sb)
diff --git a/fs/pidfs.c b/fs/pidfs.c index 3182533..e3825ee 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c
@@ -608,9 +608,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct user_namespace *user_ns; user_ns = task_cred_xxx(task, user_ns); - if (!ns_ref_get(user_ns)) - break; - ns_common = to_ns_common(user_ns); + if (ns_ref_get(user_ns)) + ns_common = to_ns_common(user_ns); } #endif break; @@ -620,9 +619,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct pid_namespace *pid_ns; pid_ns = task_active_pid_ns(task); - if (!ns_ref_get(pid_ns)) - break; - ns_common = to_ns_common(pid_ns); + if (ns_ref_get(pid_ns)) + ns_common = to_ns_common(pid_ns); } #endif break;
diff --git a/fs/proc/base.c b/fs/proc/base.c index 4eec684..4c863d1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c
@@ -2128,6 +2128,9 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, ino_t ino = 1; child = try_lookup_noperm(&qname, dir); + if (IS_ERR(child)) + goto end_instantiate; + if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq);
diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 3abd357..1a6e1e1 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile
@@ -48,12 +48,15 @@ # Build the SMB2 error mapping table from smb2status.h # $(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \ - $(src)/gen_smb2_mapping - $(call cmd,gen_smb2_mapping) + $(src)/gen_smb2_mapping FORCE + $(call if_changed,gen_smb2_mapping) $(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c quiet_cmd_gen_smb2_mapping = GEN $@ cmd_gen_smb2_mapping = perl $(src)/gen_smb2_mapping $< $@ -clean-files += smb2_mapping_table.c +obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o + +# Let Kbuild handle tracking and cleaning +targets += smb2_mapping_table.c
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index c327c24..04bb950 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c
@@ -118,7 +118,7 @@ static const char *path_no_prefix(struct cifs_sb_info *cifs_sb, if (!*path) return path; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) && cifs_sb->prepath) { len = strlen(cifs_sb->prepath) + 1; if (unlikely(len > strlen(path)))
diff --git a/fs/smb/client/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h index 5e8d163..84e7e36 100644 --- a/fs/smb/client/cifs_fs_sb.h +++ b/fs/smb/client/cifs_fs_sb.h
@@ -55,7 +55,7 @@ struct cifs_sb_info { struct nls_table *local_nls; struct smb3_fs_context *ctx; atomic_t active; - unsigned int mnt_cifs_flags; + atomic_t mnt_cifs_flags; struct delayed_work prune_tlinks; struct rcu_head rcu;
diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index b51ce64..147496a 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h
@@ -122,11 +122,3 @@ struct smb3_notify_info { #define CIFS_GOING_FLAGS_DEFAULT 0x0 /* going down */ #define CIFS_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define CIFS_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ - -static inline bool cifs_forced_shutdown(struct cifs_sb_info *sbi) -{ - if (CIFS_MOUNT_SHUTDOWN & sbi->mnt_cifs_flags) - return true; - else - return false; -}
diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c index e7891b4..e2edc20 100644 --- a/fs/smb/client/cifs_unicode.c +++ b/fs/smb/client/cifs_unicode.c
@@ -11,20 +11,6 @@ #include "cifsglob.h" #include "cifs_debug.h" -int cifs_remap(struct cifs_sb_info *cifs_sb) -{ - int map_type; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) - map_type = SFM_MAP_UNI_RSVD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - map_type = SFU_MAP_UNI_RSVD; - else - map_type = NO_MAP_UNI_RSVD; - - return map_type; -} - /* Convert character using the SFU - "Services for Unix" remapping range */ static bool convert_sfu_char(const __u16 src_char, char *target)
diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h index 9249db3..3e9cd9ac 100644 --- a/fs/smb/client/cifs_unicode.h +++ b/fs/smb/client/cifs_unicode.h
@@ -22,6 +22,7 @@ #include <linux/types.h> #include <linux/nls.h> #include "../../nls/nls_ucs2_utils.h" +#include "cifsglob.h" /* * Macs use an older "SFM" mapping of the symbols above. Fortunately it does @@ -65,10 +66,21 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen, const struct nls_table *codepage); int cifsConvertToUTF16(__le16 *target, const char *source, int srclen, const struct nls_table *cp, int map_chars); -int cifs_remap(struct cifs_sb_info *cifs_sb); __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, const struct nls_table *cp, int remap); wchar_t cifs_toupper(wchar_t in); +static inline int cifs_remap(const struct cifs_sb_info *cifs_sb) +{ + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_MAP_SFM_CHR) + return SFM_MAP_UNI_RSVD; + if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR) + return SFU_MAP_UNI_RSVD; + + return NO_MAP_UNI_RSVD; +} + #endif /* _CIFS_UNICODE_H */
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 6fa12c9..c920039 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c
@@ -356,7 +356,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, psid->num_subauth, SID_MAX_SUB_AUTHORITIES); } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) || + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_UID_FROM_ACL) || (cifs_sb_master_tcon(cifs_sb)->posix_extensions)) { uint32_t unix_id; bool is_group; @@ -1489,7 +1489,7 @@ struct smb_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, struct cifsFileInfo *open_file = NULL; if (inode) - open_file = find_readable_file(CIFS_I(inode), true); + open_file = find_readable_file(CIFS_I(inode), FIND_FSUID_ONLY); if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen, info); @@ -1612,7 +1612,8 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, struct smb_acl *dacl_ptr = NULL; struct smb_ntsd *pntsd = NULL; /* acl obtained from server */ struct smb_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags; struct tcon_link *tlink; struct smb_version_operations *ops; bool mode_from_sid, id_from_sid; @@ -1643,15 +1644,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, return rc; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) - mode_from_sid = true; - else - mode_from_sid = false; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) - id_from_sid = true; - else - id_from_sid = false; + sbflags = cifs_sb_flags(cifs_sb); + mode_from_sid = sbflags & CIFS_MOUNT_MODE_FROM_SID; + id_from_sid = sbflags & CIFS_MOUNT_UID_FROM_ACL; /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 99b0423..32d0305 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c
@@ -226,16 +226,18 @@ cifs_sb_deactive(struct super_block *sb) static int cifs_read_super(struct super_block *sb) { - struct inode *inode; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; + unsigned int sbflags; struct timespec64 ts; + struct inode *inode; int rc = 0; cifs_sb = CIFS_SB(sb); tcon = cifs_sb_master_tcon(cifs_sb); + sbflags = cifs_sb_flags(cifs_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) + if (sbflags & CIFS_MOUNT_POSIXACL) sb->s_flags |= SB_POSIXACL; if (tcon->snapshot_time) @@ -311,7 +313,7 @@ cifs_read_super(struct super_block *sb) } #ifdef CONFIG_CIFS_NFSD_EXPORT - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + if (sbflags & CIFS_MOUNT_SERVER_INUM) { cifs_dbg(FYI, "export ops supported\n"); sb->s_export_op = &cifs_export_ops; } @@ -330,10 +332,14 @@ static void cifs_kill_sb(struct super_block *sb) /* * We need to release all dentries for the cached directories - * before we kill the sb. + * and close all deferred file handles before we kill the sb. */ if (cifs_sb->root) { close_all_cached_dirs(cifs_sb); + cifs_close_all_deferred_files_sb(cifs_sb); + + /* Wait for all pending oplock breaks to complete */ + flush_workqueue(cifsoplockd_wq); /* finally release root dentry */ dput(cifs_sb->root); @@ -389,8 +395,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) { - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(CIFS_SB(file)); struct TCP_Server_Info *server = tcon->ses->server; struct inode *inode = file_inode(file); int rc; @@ -418,11 +423,9 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) static int cifs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { - struct cifs_sb_info *cifs_sb; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); - cifs_sb = CIFS_SB(inode->i_sb); - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if (sbflags & CIFS_MOUNT_NO_PERM) { if ((mask & MAY_EXEC) && !execute_ok(inode)) return -EACCES; else @@ -568,15 +571,17 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses) static void cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + seq_puts(s, ",cache="); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + if (sbflags & CIFS_MOUNT_STRICT_IO) seq_puts(s, "strict"); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + else if (sbflags & CIFS_MOUNT_DIRECT_IO) seq_puts(s, "none"); - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) + else if (sbflags & CIFS_MOUNT_RW_CACHE) seq_puts(s, "singleclient"); /* assume only one client access */ - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) + else if (sbflags & CIFS_MOUNT_RO_CACHE) seq_puts(s, "ro"); /* read only caching assumed */ else seq_puts(s, "loose"); @@ -637,6 +642,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct sockaddr *srcaddr; + unsigned int sbflags; + srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; seq_show_option(s, "vers", tcon->ses->server->vals->version_string); @@ -670,16 +677,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root) (int)(srcaddr->sa_family)); } + sbflags = cifs_sb_flags(cifs_sb); seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + if (sbflags & CIFS_MOUNT_OVERR_UID) seq_puts(s, ",forceuid"); else seq_puts(s, ",noforceuid"); seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + if (sbflags & CIFS_MOUNT_OVERR_GID) seq_puts(s, ",forcegid"); else seq_puts(s, ",noforcegid"); @@ -722,53 +730,53 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",unix"); else seq_puts(s, ",nounix"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + if (sbflags & CIFS_MOUNT_NO_DFS) seq_puts(s, ",nodfs"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + if (sbflags & CIFS_MOUNT_POSIX_PATHS) seq_puts(s, ",posixpaths"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + if (sbflags & CIFS_MOUNT_SET_UID) seq_puts(s, ",setuids"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) + if (sbflags & CIFS_MOUNT_UID_FROM_ACL) seq_puts(s, ",idsfromsid"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + if (sbflags & CIFS_MOUNT_SERVER_INUM) seq_puts(s, ",serverino"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + if (sbflags & CIFS_MOUNT_RWPIDFORWARD) seq_puts(s, ",rwpidforward"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) + if (sbflags & CIFS_MOUNT_NOPOSIXBRL) seq_puts(s, ",forcemand"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (sbflags & CIFS_MOUNT_NO_XATTR) seq_puts(s, ",nouser_xattr"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + if (sbflags & CIFS_MOUNT_MAP_SPECIAL_CHR) seq_puts(s, ",mapchars"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + if (sbflags & CIFS_MOUNT_MAP_SFM_CHR) seq_puts(s, ",mapposix"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + if (sbflags & CIFS_MOUNT_UNX_EMUL) seq_puts(s, ",sfu"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + if (sbflags & CIFS_MOUNT_NO_BRL) seq_puts(s, ",nobrl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE) + if (sbflags & CIFS_MOUNT_NO_HANDLE_CACHE) seq_puts(s, ",nohandlecache"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) + if (sbflags & CIFS_MOUNT_MODE_FROM_SID) seq_puts(s, ",modefromsid"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if (sbflags & CIFS_MOUNT_CIFS_ACL) seq_puts(s, ",cifsacl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) seq_puts(s, ",dynperm"); if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(s, ",acl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) seq_puts(s, ",mfsymlinks"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + if (sbflags & CIFS_MOUNT_FSCACHE) seq_puts(s, ",fsc"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + if (sbflags & CIFS_MOUNT_NOSSYNC) seq_puts(s, ",nostrictsync"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (sbflags & CIFS_MOUNT_NO_PERM) seq_puts(s, ",noperm"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) + if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID) seq_printf(s, ",backupuid=%u", from_kuid_munged(&init_user_ns, cifs_sb->ctx->backupuid)); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) + if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID) seq_printf(s, ",backupgid=%u", from_kgid_munged(&init_user_ns, cifs_sb->ctx->backupgid)); @@ -864,7 +872,6 @@ static void cifs_umount_begin(struct super_block *sb) spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); - cifs_close_all_deferred_files(tcon); /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if (tcon->ses && tcon->ses->server) { @@ -909,10 +916,10 @@ static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) static int cifs_drop_inode(struct inode *inode) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); /* no serverino => unconditional eviction */ - return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || + return !(sbflags & CIFS_MOUNT_SERVER_INUM) || inode_generic_drop(inode); } @@ -950,7 +957,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb) char *s, *p; char sep; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) return dget(sb->s_root); full_path = cifs_build_path_to_root(ctx, cifs_sb, @@ -1262,7 +1269,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s struct cifsFileInfo *writeable_srcfile; int rc = -EINVAL; - writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY); + writeable_srcfile = find_writable_file(src_cifsi, FIND_FSUID_ONLY); if (writeable_srcfile) { if (src_tcon->ses->server->ops->set_file_size) rc = src_tcon->ses->server->ops->set_file_size(
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 080ea60..709e96e 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h
@@ -20,6 +20,7 @@ #include <linux/utsname.h> #include <linux/sched/mm.h> #include <linux/netfs.h> +#include <linux/fcntl.h> #include "cifs_fs_sb.h" #include "cifsacl.h" #include <crypto/internal/hash.h> @@ -1580,24 +1581,59 @@ CIFS_I(struct inode *inode) return container_of(inode, struct cifsInodeInfo, netfs.inode); } -static inline struct cifs_sb_info * -CIFS_SB(struct super_block *sb) +static inline void *cinode_to_fsinfo(struct cifsInodeInfo *cinode) +{ + return cinode->netfs.inode.i_sb->s_fs_info; +} + +static inline void *super_to_fsinfo(struct super_block *sb) { return sb->s_fs_info; } -static inline struct cifs_sb_info * -CIFS_FILE_SB(struct file *file) +static inline void *inode_to_fsinfo(struct inode *inode) { - return CIFS_SB(file_inode(file)->i_sb); + return inode->i_sb->s_fs_info; +} + +static inline void *file_to_fsinfo(struct file *file) +{ + return file_inode(file)->i_sb->s_fs_info; +} + +static inline void *dentry_to_fsinfo(struct dentry *dentry) +{ + return dentry->d_sb->s_fs_info; +} + +static inline void *const_dentry_to_fsinfo(const struct dentry *dentry) +{ + return dentry->d_sb->s_fs_info; +} + +#define CIFS_SB(_ptr) \ + ((struct cifs_sb_info *) \ + _Generic((_ptr), \ + struct cifsInodeInfo * : cinode_to_fsinfo, \ + const struct dentry * : const_dentry_to_fsinfo, \ + struct super_block * : super_to_fsinfo, \ + struct dentry * : dentry_to_fsinfo, \ + struct inode * : inode_to_fsinfo, \ + struct file * : file_to_fsinfo)(_ptr)) + +/* + * Use atomic_t for @cifs_sb->mnt_cifs_flags as it is currently accessed + * locklessly and may be changed concurrently by mount/remount and reconnect + * paths. + */ +static inline unsigned int cifs_sb_flags(const struct cifs_sb_info *cifs_sb) +{ + return atomic_read(&cifs_sb->mnt_cifs_flags); } static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - return '/'; - else - return '\\'; + return (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) ? '/' : '\\'; } static inline void @@ -1849,12 +1885,12 @@ static inline bool is_replayable_error(int error) } -/* cifs_get_writable_file() flags */ -enum cifs_writable_file_flags { - FIND_WR_ANY = 0U, - FIND_WR_FSUID_ONLY = (1U << 0), - FIND_WR_WITH_DELETE = (1U << 1), - FIND_WR_NO_PENDING_DELETE = (1U << 2), +enum cifs_find_flags { + FIND_ANY = 0U, + FIND_FSUID_ONLY = (1U << 0), + FIND_WITH_DELETE = (1U << 1), + FIND_NO_PENDING_DELETE = (1U << 2), + FIND_OPEN_FLAGS = (1U << 3), }; #define MID_FREE 0 @@ -2314,9 +2350,8 @@ static inline bool __cifs_cache_state_check(struct cifsInodeInfo *cinode, unsigned int oplock_flags, unsigned int sb_flags) { - struct cifs_sb_info *cifs_sb = CIFS_SB(cinode->netfs.inode.i_sb); + unsigned int sflags = cifs_sb_flags(CIFS_SB(cinode)); unsigned int oplock = READ_ONCE(cinode->oplock); - unsigned int sflags = cifs_sb->mnt_cifs_flags; return (oplock & oplock_flags) || (sflags & sb_flags); } @@ -2336,4 +2371,25 @@ static inline void cifs_reset_oplock(struct cifsInodeInfo *cinode) WRITE_ONCE(cinode->oplock, 0); } +static inline bool cifs_forced_shutdown(const struct cifs_sb_info *sbi) +{ + return cifs_sb_flags(sbi) & CIFS_MOUNT_SHUTDOWN; +} + +static inline int cifs_open_create_options(unsigned int oflags, int opts) +{ + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (oflags & O_SYNC) + opts |= CREATE_WRITE_THROUGH; + if (oflags & O_DIRECT) + opts |= CREATE_NO_BUFFER; + return opts; +} + +/* + * The number of blocks is not related to (i_size / i_blksize), but instead + * 512 byte (2**9) size is required for calculating num blocks. + */ +#define CIFS_INO_BLOCKS(size) DIV_ROUND_UP_ULL((u64)(size), 512) + #endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 96d6b53..884bfa1 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h
@@ -138,12 +138,14 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, int flags); -int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, - struct cifsFileInfo **ret_file); +int __cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, unsigned int open_flags, + struct cifsFileInfo **ret_file); int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, int flags, struct cifsFileInfo **ret_file); -struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only); +struct cifsFileInfo *__find_readable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + unsigned int open_flags); int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, struct cifsFileInfo **ret_file); int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, @@ -261,6 +263,7 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode); void cifs_close_all_deferred_files(struct cifs_tcon *tcon); +void cifs_close_all_deferred_files_sb(struct cifs_sb_info *cifs_sb); void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, struct dentry *dentry); @@ -595,4 +598,20 @@ static inline void cifs_sg_set_buf(struct sg_table *sgtable, } } +static inline int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + struct cifsFileInfo **ret_file) +{ + find_flags &= ~FIND_OPEN_FLAGS; + return __cifs_get_writable_file(cifs_inode, find_flags, 0, ret_file); +} + +static inline struct cifsFileInfo * +find_readable_file(struct cifsInodeInfo *cinode, unsigned int find_flags) +{ + find_flags &= ~FIND_OPEN_FLAGS; + find_flags |= FIND_NO_PENDING_DELETE; + return __find_readable_file(cinode, find_flags, 0); +} + #endif /* _CIFSPROTO_H */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 33dfe11..69b38f0c 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c
@@ -1955,6 +1955,10 @@ static int match_session(struct cifs_ses *ses, case Kerberos: if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; + if (strncmp(ses->user_name ?: "", + ctx->username ?: "", + CIFS_MAX_USERNAME_LEN)) + return 0; break; case NTLMv2: case RawNTLMSSP: @@ -2167,9 +2171,6 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) #ifdef CONFIG_KEYS -/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ -#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) - /* Populate username and pw fields from keyring if possible */ static int cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) @@ -2177,6 +2178,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) int rc = 0; int is_domain = 0; const char *delim, *payload; + size_t desc_sz; char *desc; ssize_t len; struct key *key; @@ -2185,7 +2187,9 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) struct sockaddr_in6 *sa6; const struct user_key_payload *upayload; - desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL); + /* "cifs:a:" and "cifs:d:" are the same length; +1 for NUL terminator */ + desc_sz = strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1; + desc = kmalloc(desc_sz, GFP_KERNEL); if (!desc) return -ENOMEM; @@ -2193,11 +2197,11 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) switch (server->dstaddr.ss_family) { case AF_INET: sa = (struct sockaddr_in *)&server->dstaddr; - sprintf(desc, "cifs:a:%pI4", &sa->sin_addr.s_addr); + snprintf(desc, desc_sz, "cifs:a:%pI4", &sa->sin_addr.s_addr); break; case AF_INET6: sa6 = (struct sockaddr_in6 *)&server->dstaddr; - sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); + snprintf(desc, desc_sz, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); break; default: cifs_dbg(FYI, "Bad ss_family (%hu)\n", @@ -2216,7 +2220,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) } /* didn't work, try to find a domain key */ - sprintf(desc, "cifs:d:%s", ses->domainName); + snprintf(desc, desc_sz, "cifs:d:%s", ses->domainName); cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); key = request_key(&key_type_logon, desc, ""); if (IS_ERR(key)) { @@ -2236,7 +2240,6 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) /* find first : in payload */ payload = upayload->data; delim = strnchr(payload, upayload->datalen, ':'); - cifs_dbg(FYI, "payload=%s\n", payload); if (!delim) { cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n", upayload->datalen); @@ -2915,8 +2918,8 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - unsigned int oldflags = old->mnt_cifs_flags & CIFS_MOUNT_MASK; - unsigned int newflags = new->mnt_cifs_flags & CIFS_MOUNT_MASK; + unsigned int oldflags = cifs_sb_flags(old) & CIFS_MOUNT_MASK; + unsigned int newflags = cifs_sb_flags(new) & CIFS_MOUNT_MASK; if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK)) return 0; @@ -2971,9 +2974,9 @@ static int match_prepath(struct super_block *sb, struct smb3_fs_context *ctx = mnt_data->ctx; struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + bool old_set = (cifs_sb_flags(old) & CIFS_MOUNT_USE_PREFIX_PATH) && old->prepath; - bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + bool new_set = (cifs_sb_flags(new) & CIFS_MOUNT_USE_PREFIX_PATH) && new->prepath; if (tcon->origin_fullpath && @@ -3004,7 +3007,7 @@ cifs_match_super(struct super_block *sb, void *data) cifs_sb = CIFS_SB(sb); /* We do not want to use a superblock that has been shutdown */ - if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) { + if (cifs_forced_shutdown(cifs_sb)) { spin_unlock(&cifs_tcp_ses_lock); return 0; } @@ -3469,6 +3472,8 @@ ip_connect(struct TCP_Server_Info *server) int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) { struct smb3_fs_context *ctx = cifs_sb->ctx; + unsigned int sbflags; + int rc = 0; INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); INIT_LIST_HEAD(&cifs_sb->tcon_sb_link); @@ -3493,17 +3498,16 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) } ctx->local_nls = cifs_sb->local_nls; - smb3_update_mnt_flags(cifs_sb); + sbflags = smb3_update_mnt_flags(cifs_sb); if (ctx->direct_io) cifs_dbg(FYI, "mounting share using direct i/o\n"); if (ctx->cache_ro) { cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE; + sbflags |= CIFS_MOUNT_RO_CACHE; } else if (ctx->cache_rw) { cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n"); - cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE | - CIFS_MOUNT_RW_CACHE); + sbflags |= CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE; } if ((ctx->cifs_acl) && (ctx->dynperm)) @@ -3512,16 +3516,19 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) if (ctx->prepath) { cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL); if (cifs_sb->prepath == NULL) - return -ENOMEM; - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = -ENOMEM; + else + sbflags |= CIFS_MOUNT_USE_PREFIX_PATH; } - return 0; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + return rc; } /* Release all succeed connections */ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) { + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; int rc = 0; if (mnt_ctx->tcon) @@ -3533,7 +3540,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) mnt_ctx->ses = NULL; mnt_ctx->tcon = NULL; mnt_ctx->server = NULL; - mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; + atomic_andnot(CIFS_MOUNT_POSIX_PATHS, &cifs_sb->mnt_cifs_flags); free_xid(mnt_ctx->xid); } @@ -3587,19 +3594,23 @@ int cifs_mount_get_session(struct cifs_mount_ctx *mnt_ctx) int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) { struct TCP_Server_Info *server; + struct cifs_tcon *tcon = NULL; struct cifs_sb_info *cifs_sb; struct smb3_fs_context *ctx; - struct cifs_tcon *tcon = NULL; + unsigned int sbflags; int rc = 0; - if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->server || !mnt_ctx->ses || !mnt_ctx->fs_ctx || - !mnt_ctx->cifs_sb)) { - rc = -EINVAL; - goto out; + if (WARN_ON_ONCE(!mnt_ctx)) + return -EINVAL; + if (WARN_ON_ONCE(!mnt_ctx->server || !mnt_ctx->ses || + !mnt_ctx->fs_ctx || !mnt_ctx->cifs_sb)) { + mnt_ctx->tcon = NULL; + return -EINVAL; } server = mnt_ctx->server; ctx = mnt_ctx->fs_ctx; cifs_sb = mnt_ctx->cifs_sb; + sbflags = cifs_sb_flags(cifs_sb); /* search for existing tcon to this server share */ tcon = cifs_get_tcon(mnt_ctx->ses, ctx); @@ -3614,9 +3625,9 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * path (i.e., do not remap / and \ and do not map any special characters) */ if (tcon->posix_extensions) { - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; - cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR | - CIFS_MOUNT_MAP_SPECIAL_CHR); + sbflags |= CIFS_MOUNT_POSIX_PATHS; + sbflags &= ~(CIFS_MOUNT_MAP_SFM_CHR | + CIFS_MOUNT_MAP_SPECIAL_CHR); } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY @@ -3643,12 +3654,11 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { server->ops->qfs_tcon(mnt_ctx->xid, tcon, cifs_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { + if (sbflags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) cifs_dbg(VFS, "mounted to read only share\n"); - else if ((cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_RW_CACHE) == 0) + else if (!(sbflags & CIFS_MOUNT_RW_CACHE)) cifs_dbg(VFS, "read only mount of RW share\n"); /* no need to log a RW mount of a typical RW share */ } @@ -3660,11 +3670,12 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * Inside cifs_fscache_get_super_cookie it checks * that we do not get super cookie twice. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + if (sbflags & CIFS_MOUNT_FSCACHE) cifs_fscache_get_super_cookie(tcon); out: mnt_ctx->tcon = tcon; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); return rc; } @@ -3783,7 +3794,8 @@ int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx) cifs_sb, full_path, tcon->Flags & SMB_SHARE_IS_IN_DFS); if (rc != 0) { cifs_server_dbg(VFS, "cannot query dirs between root and final path, enabling CIFS_MOUNT_USE_PREFIX_PATH\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, + &cifs_sb->mnt_cifs_flags); rc = 0; } } @@ -3863,7 +3875,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) * Force the use of prefix path to support failover on DFS paths that resolve to targets * that have different prefix paths. */ - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; @@ -4357,7 +4369,7 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb) kuid_t fsuid = current_fsuid(); int err; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); spin_lock(&cifs_sb->tlink_tree_lock);
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 9831327..83f8cf2 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c
@@ -1333,7 +1333,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) * Force the use of prefix path to support failover on DFS paths that resolve to targets * that have different prefix paths. */ - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); refresh_tcon_referral(tcon, true); return 0;
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index cb10088..6d2378e 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c
@@ -82,10 +82,11 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa const char *tree, int tree_len, bool prefix) { - int dfsplen; - int pplen = 0; - struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry); + unsigned int sbflags = cifs_sb_flags(cifs_sb); char dirsep = CIFS_DIR_SEP(cifs_sb); + int pplen = 0; + int dfsplen; char *s; if (unlikely(!page)) @@ -96,7 +97,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa else dfsplen = 0; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if (sbflags & CIFS_MOUNT_USE_PREFIX_PATH) pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0; s = dentry_path_raw(direntry, page, PATH_MAX); @@ -123,7 +124,7 @@ char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *pa if (dfsplen) { s -= dfsplen; memcpy(s, tree, dfsplen); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { + if (sbflags & CIFS_MOUNT_POSIX_PATHS) { int i; for (i = 0; i < dfsplen; i++) { if (s[i] == '\\') @@ -152,7 +153,7 @@ char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page static int check_name(struct dentry *direntry, struct cifs_tcon *tcon) { - struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry); int i; if (unlikely(tcon->fsAttrInfo.MaxPathNameComponentLength && @@ -160,7 +161,7 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon) le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength))) return -ENAMETOOLONG; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { cifs_dbg(FYI, "Invalid file name\n"); @@ -181,11 +182,12 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int rc = -ENOENT; int create_options = CREATE_NOT_DIR; int desired_access; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon = tlink_tcon(tlink); const char *full_path; void *page = alloc_dentry_path(); struct inode *newinode = NULL; + unsigned int sbflags = cifs_sb_flags(cifs_sb); int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; @@ -306,6 +308,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned goto out; } + create_options |= cifs_open_create_options(oflags, create_options); /* * if we're not using unix extensions, see if we need to set * ATTR_READONLY on the create call @@ -374,7 +377,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned .device = 0, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); if (inode->i_mode & S_ISGID) args.gid = inode->i_gid; @@ -411,9 +414,9 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (server->ops->set_lease_key) server->ops->set_lease_key(newinode, fid); if ((*oplock & CIFS_CREATE_ACTION) && S_ISREG(newinode->i_mode)) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { newinode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) newinode->i_gid = inode->i_gid; @@ -458,18 +461,20 @@ int cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct file *file, unsigned int oflags, umode_t mode) { - int rc; - unsigned int xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_open_info_data buf = {}; + struct TCP_Server_Info *server; + struct cifsFileInfo *file_info; + struct cifs_pending_open open; + struct cifs_fid fid = {}; struct tcon_link *tlink; struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - struct cifs_fid fid = {}; - struct cifs_pending_open open; + unsigned int sbflags; + unsigned int xid; __u32 oplock; - struct cifsFileInfo *file_info; - struct cifs_open_info_data buf = {}; + int rc; - if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) + if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); /* @@ -499,7 +504,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", inode, direntry, direntry); - tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); goto out_free_xid; @@ -536,13 +541,13 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out; } - if (file->f_flags & O_DIRECT && - CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + sbflags = cifs_sb_flags(cifs_sb); + if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) { + if (sbflags & CIFS_MOUNT_NO_BRL) file->f_op = &cifs_file_direct_nobrl_ops; else file->f_op = &cifs_file_direct_ops; - } + } file_info = cifs_new_fileinfo(&fid, file, tlink, oplock, buf.symlink_target); if (file_info == NULL) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 18f31d4..a69e05f 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c
@@ -255,7 +255,7 @@ static void cifs_begin_writeback(struct netfs_io_request *wreq) struct cifs_io_request *req = container_of(wreq, struct cifs_io_request, rreq); int ret; - ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_WR_ANY, &req->cfile); + ret = cifs_get_writable_file(CIFS_I(wreq->inode), FIND_ANY, &req->cfile); if (ret) { cifs_dbg(VFS, "No writable handle in writepages ret=%d\n", ret); return; @@ -270,7 +270,7 @@ static void cifs_begin_writeback(struct netfs_io_request *wreq) static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) { struct cifs_io_request *req = container_of(rreq, struct cifs_io_request, rreq); - struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode); struct cifsFileInfo *open_file = NULL; rreq->rsize = cifs_sb->ctx->rsize; @@ -281,7 +281,7 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RWPIDFORWARD) req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { WARN_ON_ONCE(1); @@ -584,15 +584,8 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ *********************************************************************/ disposition = cifs_get_disposition(f_flags); - /* BB pass O_SYNC flag through on file attributes .. BB */ - - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ - if (f_flags & O_SYNC) - create_options |= CREATE_WRITE_THROUGH; - - if (f_flags & O_DIRECT) - create_options |= CREATE_NO_BUFFER; + create_options |= cifs_open_create_options(f_flags, create_options); retry_open: oparms = (struct cifs_open_parms) { @@ -711,8 +704,6 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); - cifs_sb_active(inode->i_sb); - /* * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. @@ -767,7 +758,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) struct inode *inode = d_inode(cifs_file->dentry); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsLockInfo *li, *tmp; - struct super_block *sb = inode->i_sb; /* * Delete any outstanding lock records. We'll lose them when the file @@ -785,7 +775,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); - cifs_sb_deactive(sb); kfree(cifs_file->symlink_target); kfree(cifs_file); } @@ -906,7 +895,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, * close because it may cause a error when we open this file * again and get at least level II oplock. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_STRICT_IO) set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); cifs_set_oplock_level(cifsi, 0); } @@ -955,11 +944,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, int cifs_file_flush(const unsigned int xid, struct inode *inode, struct cifsFileInfo *cfile) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon; int rc; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC) return 0; if (cfile && (OPEN_FMODE(cfile->f_flags) & FMODE_WRITE)) { @@ -967,7 +956,7 @@ int cifs_file_flush(const unsigned int xid, struct inode *inode, return tcon->ses->server->ops->flush(xid, tcon, &cfile->fid); } - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + rc = cifs_get_writable_file(CIFS_I(inode), FIND_ANY, &cfile); if (!rc) { tcon = tlink_tcon(cfile->tlink); rc = tcon->ses->server->ops->flush(xid, tcon, &cfile->fid); @@ -992,7 +981,7 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) return -ERESTARTSYS; mapping_set_error(inode->i_mapping, rc); - cfile = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + cfile = find_writable_file(cinode, FIND_FSUID_ONLY); rc = cifs_file_flush(xid, inode, cfile); if (!rc) { if (cfile) { @@ -1004,7 +993,6 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) if (!rc) { netfs_resize_file(&cinode->netfs, 0, true); cifs_setsize(inode, 0); - inode->i_blocks = 0; } } if (cfile) @@ -1015,24 +1003,24 @@ static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) int cifs_open(struct inode *inode, struct file *file) { + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_open_info_data data = {}; + struct cifsFileInfo *cfile = NULL; + struct TCP_Server_Info *server; + struct cifs_pending_open open; + bool posix_open_ok = false; + struct cifs_fid fid = {}; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + const char *full_path; + unsigned int sbflags; int rc = -EACCES; unsigned int xid; __u32 oplock; - struct cifs_sb_info *cifs_sb; - struct TCP_Server_Info *server; - struct cifs_tcon *tcon; - struct tcon_link *tlink; - struct cifsFileInfo *cfile = NULL; void *page; - const char *full_path; - bool posix_open_ok = false; - struct cifs_fid fid = {}; - struct cifs_pending_open open; - struct cifs_open_info_data data = {}; xid = get_xid(); - cifs_sb = CIFS_SB(inode->i_sb); if (unlikely(cifs_forced_shutdown(cifs_sb))) { free_xid(xid); return smb_EIO(smb_eio_trace_forced_shutdown); @@ -1056,9 +1044,9 @@ int cifs_open(struct inode *inode, struct file *file) cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); - if (file->f_flags & O_DIRECT && - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + sbflags = cifs_sb_flags(cifs_sb); + if ((file->f_flags & O_DIRECT) && (sbflags & CIFS_MOUNT_STRICT_IO)) { + if (sbflags & CIFS_MOUNT_NO_BRL) file->f_op = &cifs_file_direct_nobrl_ops; else file->f_op = &cifs_file_direct_ops; @@ -1072,32 +1060,29 @@ int cifs_open(struct inode *inode, struct file *file) /* Get the cached handle as SMB2 close is deferred */ if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) { - rc = cifs_get_writable_path(tcon, full_path, - FIND_WR_FSUID_ONLY | - FIND_WR_NO_PENDING_DELETE, - &cfile); + rc = __cifs_get_writable_file(CIFS_I(inode), + FIND_FSUID_ONLY | + FIND_NO_PENDING_DELETE | + FIND_OPEN_FLAGS, + file->f_flags, &cfile); } else { - rc = cifs_get_readable_path(tcon, full_path, &cfile); + cfile = __find_readable_file(CIFS_I(inode), + FIND_NO_PENDING_DELETE | + FIND_OPEN_FLAGS, + file->f_flags); + rc = cfile ? 0 : -ENOENT; } if (rc == 0) { - unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); - unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); - - if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && - (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { - file->private_data = cfile; - spin_lock(&CIFS_I(inode)->deferred_lock); - cifs_del_deferred_close(cfile); - spin_unlock(&CIFS_I(inode)->deferred_lock); - goto use_cache; - } - _cifsFileInfo_put(cfile, true, false); - } else { - /* hard link on the defeered close file */ - rc = cifs_get_hardlink_path(tcon, inode, file); - if (rc) - cifs_close_deferred_file(CIFS_I(inode)); + file->private_data = cfile; + spin_lock(&CIFS_I(inode)->deferred_lock); + cifs_del_deferred_close(cfile); + spin_unlock(&CIFS_I(inode)->deferred_lock); + goto use_cache; } + /* hard link on the deferred close file */ + rc = cifs_get_hardlink_path(tcon, inode, file); + if (rc) + cifs_close_deferred_file(CIFS_I(inode)); if (server->oplocks) oplock = REQ_OPLOCK; @@ -1209,7 +1194,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(cinode); #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); @@ -1222,7 +1207,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) rc = cifs_push_posix_locks(cfile); else #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -1318,13 +1303,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) rdwr_for_fscache = 1; desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); - - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ - if (cfile->f_flags & O_SYNC) - create_options |= CREATE_WRITE_THROUGH; - - if (cfile->f_flags & O_DIRECT) - create_options |= CREATE_NO_BUFFER; + create_options |= cifs_open_create_options(cfile->f_flags, + create_options); if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); @@ -2011,7 +1991,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY - struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(cinode); #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ /* we are going to update can_cache_brlcks here - need a write access */ @@ -2024,7 +2004,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) rc = cifs_push_posix_locks(cfile); else #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -2428,11 +2408,11 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_FILE_SB(file); + cifs_sb = CIFS_SB(file); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) posix_lck = true; if (!lock && !unlock) { @@ -2455,14 +2435,14 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) int cifs_lock(struct file *file, int cmd, struct file_lock *flock) { - int rc, xid; + struct cifs_sb_info *cifs_sb = CIFS_SB(file); + struct cifsFileInfo *cfile; int lock = 0, unlock = 0; bool wait_flag = false; bool posix_lck = false; - struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsFileInfo *cfile; __u32 type; + int rc, xid; rc = -EACCES; xid = get_xid(); @@ -2477,12 +2457,11 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_FILE_SB(file); set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) posix_lck = true; /* * BB add code here to normalize offset and length to account for @@ -2529,14 +2508,37 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t netfs_write_subrequest_terminated(&wdata->subreq, result); } -struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only) +static bool open_flags_match(struct cifsInodeInfo *cinode, + unsigned int oflags, unsigned int cflags) { + struct inode *inode = &cinode->netfs.inode; + int crw = 0, orw = 0; + + oflags &= ~(O_CREAT | O_EXCL | O_TRUNC); + cflags &= ~(O_CREAT | O_EXCL | O_TRUNC); + + if (cifs_fscache_enabled(inode)) { + if (OPEN_FMODE(cflags) & FMODE_WRITE) + crw = 1; + if (OPEN_FMODE(oflags) & FMODE_WRITE) + orw = 1; + } + if (cifs_convert_flags(oflags, orw) != cifs_convert_flags(cflags, crw)) + return false; + + return (oflags & (O_SYNC | O_DIRECT)) == (cflags & (O_SYNC | O_DIRECT)); +} + +struct cifsFileInfo *__find_readable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, + unsigned int open_flags) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode); + bool fsuid_only = find_flags & FIND_FSUID_ONLY; struct cifsFileInfo *open_file = NULL; - struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); /* only filter by fsuid on multiuser mounts */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; spin_lock(&cifs_inode->open_file_lock); @@ -2546,6 +2548,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if ((find_flags & FIND_NO_PENDING_DELETE) && + open_file->status_file_deleted) + continue; + if ((find_flags & FIND_OPEN_FLAGS) && + !open_flags_match(cifs_inode, open_flags, + open_file->f_flags)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { if ((!open_file->invalidHandle)) { /* found a good file */ @@ -2564,17 +2573,17 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, } /* Return -EBADF if no handle is found and general rc otherwise */ -int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, - struct cifsFileInfo **ret_file) +int __cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, + unsigned int find_flags, unsigned int open_flags, + struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; bool any_available = false; int rc = -EBADF; unsigned int refind = 0; - bool fsuid_only = flags & FIND_WR_FSUID_ONLY; - bool with_delete = flags & FIND_WR_WITH_DELETE; + bool fsuid_only = find_flags & FIND_FSUID_ONLY; + bool with_delete = find_flags & FIND_WITH_DELETE; *ret_file = NULL; /* @@ -2589,10 +2598,10 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, return rc; } - cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); + cifs_sb = CIFS_SB(cifs_inode); /* only filter by fsuid on multiuser mounts */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; spin_lock(&cifs_inode->open_file_lock); @@ -2608,9 +2617,13 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, continue; if (with_delete && !(open_file->fid.access & DELETE)) continue; - if ((flags & FIND_WR_NO_PENDING_DELETE) && + if ((find_flags & FIND_NO_PENDING_DELETE) && open_file->status_file_deleted) continue; + if ((find_flags & FIND_OPEN_FLAGS) && + !open_flags_match(cifs_inode, open_flags, + open_file->f_flags)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2727,17 +2740,7 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, cinode = CIFS_I(d_inode(cfile->dentry)); spin_unlock(&tcon->open_file_lock); free_dentry_path(page); - *ret_file = find_readable_file(cinode, 0); - if (*ret_file) { - spin_lock(&cinode->open_file_lock); - if ((*ret_file)->status_file_deleted) { - spin_unlock(&cinode->open_file_lock); - cifsFileInfo_put(*ret_file); - *ret_file = NULL; - } else { - spin_unlock(&cinode->open_file_lock); - } - } + *ret_file = find_readable_file(cinode, FIND_ANY); return *ret_file ? 0 : -ENOENT; } @@ -2787,7 +2790,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file_inode(file); - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); rc = file_write_and_wait_range(file, start, end); if (rc) { @@ -2801,7 +2804,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) file, datasync); tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; if (server->ops->flush == NULL) { rc = -ENOSYS; @@ -2809,7 +2812,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) } if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { - smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + smbfile = find_writable_file(CIFS_I(inode), FIND_ANY); if (smbfile) { rc = server->ops->flush(xid, tcon, &smbfile->fid); cifsFileInfo_put(smbfile); @@ -2853,7 +2856,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2870,7 +2873,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) && (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, NULL, CIFS_WRITE_OP))) { @@ -2893,7 +2896,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -2906,7 +2909,7 @@ cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0)) { written = netfs_file_write_iter(iocb, from); goto out; } @@ -2994,7 +2997,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifsFileInfo *cfile = (struct cifsFileInfo *) iocb->ki_filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -3011,7 +3014,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) if (!CIFS_CACHE_READ(cinode)) return netfs_unbuffered_read_iter(iocb, to); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) { + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NOPOSIXBRL) == 0) { if (iocb->ki_flags & IOCB_DIRECT) return netfs_unbuffered_read_iter(iocb, to); return netfs_buffered_read_iter(iocb, to); @@ -3130,10 +3133,9 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, if (is_inode_writable(cifsInode) || ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { /* This inode is open for write at least once */ - struct cifs_sb_info *cifs_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(cifsInode); - cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DIRECT_IO) { /* since no page cache to corrupt on directio we can change size safely */ return true; @@ -3165,12 +3167,6 @@ void cifs_oplock_break(struct work_struct *work) __u64 persistent_fid, volatile_fid; __u16 net_fid; - /* - * Hold a reference to the superblock to prevent it and its inodes from - * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put() - * may release the last reference to the sb and trigger inode eviction. - */ - cifs_sb_active(sb); wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); @@ -3181,7 +3177,7 @@ void cifs_oplock_break(struct work_struct *work) server = tcon->ses->server; scoped_guard(spinlock, &cinode->open_file_lock) { - unsigned int sbflags = cifs_sb->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(cifs_sb); server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, cfile->oplock_epoch, &purge_cache); @@ -3255,7 +3251,6 @@ void cifs_oplock_break(struct work_struct *work) cifs_put_tlink(tlink); out: cifs_done_oplock_break(cinode); - cifs_sb_deactive(sb); } static int cifs_swap_activate(struct swap_info_struct *sis,
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 09fe749..a46764c 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c
@@ -588,6 +588,10 @@ char *cifs_sanitize_prepath(char *prepath, gfp_t gfp) while (IS_DELIM(*cursor1)) cursor1++; + /* exit in case of only delimiters */ + if (!*cursor1) + return NULL; + /* copy the first letter */ *cursor2 = *cursor1; @@ -1997,7 +2001,7 @@ int smb3_init_fs_context(struct fs_context *fc) ctx->backupuid_specified = false; /* no backup intent for a user */ ctx->backupgid_specified = false; /* no backup intent for a group */ - ctx->retrans = 1; + ctx->retrans = 0; ctx->reparse_type = CIFS_REPARSE_TYPE_DEFAULT; ctx->symlink_type = CIFS_SYMLINK_TYPE_DEFAULT; ctx->nonativesocket = 0; @@ -2062,161 +2066,160 @@ smb3_cleanup_fs_context(struct smb3_fs_context *ctx) kfree(ctx); } -void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb) +unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); struct smb3_fs_context *ctx = cifs_sb->ctx; if (ctx->nodfs) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS; + sbflags |= CIFS_MOUNT_NO_DFS; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS; + sbflags &= ~CIFS_MOUNT_NO_DFS; if (ctx->noperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; + sbflags |= CIFS_MOUNT_NO_PERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM; + sbflags &= ~CIFS_MOUNT_NO_PERM; if (ctx->setuids) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID; + sbflags |= CIFS_MOUNT_SET_UID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID; + sbflags &= ~CIFS_MOUNT_SET_UID; if (ctx->setuidfromacl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL; + sbflags |= CIFS_MOUNT_UID_FROM_ACL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL; + sbflags &= ~CIFS_MOUNT_UID_FROM_ACL; if (ctx->server_ino) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM; + sbflags |= CIFS_MOUNT_SERVER_INUM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + sbflags &= ~CIFS_MOUNT_SERVER_INUM; if (ctx->remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR; + sbflags |= CIFS_MOUNT_MAP_SFM_CHR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR; + sbflags &= ~CIFS_MOUNT_MAP_SFM_CHR; if (ctx->sfu_remap) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR; + sbflags |= CIFS_MOUNT_MAP_SPECIAL_CHR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR; + sbflags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR; if (ctx->no_xattr) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR; + sbflags |= CIFS_MOUNT_NO_XATTR; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR; + sbflags &= ~CIFS_MOUNT_NO_XATTR; if (ctx->sfu_emul) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; + sbflags |= CIFS_MOUNT_UNX_EMUL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL; + sbflags &= ~CIFS_MOUNT_UNX_EMUL; if (ctx->nobrl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; + sbflags |= CIFS_MOUNT_NO_BRL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL; + sbflags &= ~CIFS_MOUNT_NO_BRL; if (ctx->nohandlecache) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE; + sbflags |= CIFS_MOUNT_NO_HANDLE_CACHE; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE; + sbflags &= ~CIFS_MOUNT_NO_HANDLE_CACHE; if (ctx->nostrictsync) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC; + sbflags |= CIFS_MOUNT_NOSSYNC; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC; + sbflags &= ~CIFS_MOUNT_NOSSYNC; if (ctx->mand_lock) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; + sbflags |= CIFS_MOUNT_NOPOSIXBRL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL; + sbflags &= ~CIFS_MOUNT_NOPOSIXBRL; if (ctx->rwpidforward) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; + sbflags |= CIFS_MOUNT_RWPIDFORWARD; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD; + sbflags &= ~CIFS_MOUNT_RWPIDFORWARD; if (ctx->mode_ace) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID; + sbflags |= CIFS_MOUNT_MODE_FROM_SID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID; + sbflags &= ~CIFS_MOUNT_MODE_FROM_SID; if (ctx->cifs_acl) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; + sbflags |= CIFS_MOUNT_CIFS_ACL; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL; + sbflags &= ~CIFS_MOUNT_CIFS_ACL; if (ctx->backupuid_specified) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; + sbflags |= CIFS_MOUNT_CIFS_BACKUPUID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID; + sbflags &= ~CIFS_MOUNT_CIFS_BACKUPUID; if (ctx->backupgid_specified) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; + sbflags |= CIFS_MOUNT_CIFS_BACKUPGID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID; + sbflags &= ~CIFS_MOUNT_CIFS_BACKUPGID; if (ctx->override_uid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; + sbflags |= CIFS_MOUNT_OVERR_UID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID; + sbflags &= ~CIFS_MOUNT_OVERR_UID; if (ctx->override_gid) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; + sbflags |= CIFS_MOUNT_OVERR_GID; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID; + sbflags &= ~CIFS_MOUNT_OVERR_GID; if (ctx->dynperm) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; + sbflags |= CIFS_MOUNT_DYNPERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM; + sbflags &= ~CIFS_MOUNT_DYNPERM; if (ctx->fsc) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; + sbflags |= CIFS_MOUNT_FSCACHE; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE; + sbflags &= ~CIFS_MOUNT_FSCACHE; if (ctx->multiuser) - cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | - CIFS_MOUNT_NO_PERM); + sbflags |= CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_NO_PERM; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MULTIUSER; + sbflags &= ~CIFS_MOUNT_MULTIUSER; if (ctx->strict_io) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; + sbflags |= CIFS_MOUNT_STRICT_IO; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO; + sbflags &= ~CIFS_MOUNT_STRICT_IO; if (ctx->direct_io) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; + sbflags |= CIFS_MOUNT_DIRECT_IO; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO; + sbflags &= ~CIFS_MOUNT_DIRECT_IO; if (ctx->mfsymlinks) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; + sbflags |= CIFS_MOUNT_MF_SYMLINKS; else - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS; - if (ctx->mfsymlinks) { - if (ctx->sfu_emul) { - /* - * Our SFU ("Services for Unix") emulation allows now - * creating new and reading existing SFU symlinks. - * Older Linux kernel versions were not able to neither - * read existing nor create new SFU symlinks. But - * creating and reading SFU style mknod and FIFOs was - * supported for long time. When "mfsymlinks" and - * "sfu" are both enabled at the same time, it allows - * reading both types of symlinks, but will only create - * them with mfsymlinks format. This allows better - * Apple compatibility, compatibility with older Linux - * kernel clients (probably better for Samba too) - * while still recognizing old Windows style symlinks. - */ - cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); - } - } - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SHUTDOWN; + sbflags &= ~CIFS_MOUNT_MF_SYMLINKS; - return; + if (ctx->mfsymlinks && ctx->sfu_emul) { + /* + * Our SFU ("Services for Unix") emulation allows now + * creating new and reading existing SFU symlinks. + * Older Linux kernel versions were not able to neither + * read existing nor create new SFU symlinks. But + * creating and reading SFU style mknod and FIFOs was + * supported for long time. When "mfsymlinks" and + * "sfu" are both enabled at the same time, it allows + * reading both types of symlinks, but will only create + * them with mfsymlinks format. This allows better + * Apple compatibility, compatibility with older Linux + * kernel clients (probably better for Samba too) + * while still recognizing old Windows style symlinks. + */ + cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n"); + } + sbflags &= ~CIFS_MOUNT_SHUTDOWN; + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + return sbflags; }
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 49b2a6f..0b64fcb 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h
@@ -374,7 +374,7 @@ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx); int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); -void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); +unsigned int smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); /* * max deferred close timeout (jiffies) - 2^30
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d4d3cfe..888f9e3 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c
@@ -40,32 +40,33 @@ static void cifs_set_netfs_context(struct inode *inode) static void cifs_set_ops(struct inode *inode) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct netfs_inode *ictx = netfs_inode(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &cifs_file_inode_ops; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + if (sbflags & CIFS_MOUNT_DIRECT_IO) { set_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_direct_nobrl_ops; else inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + } else if (sbflags & CIFS_MOUNT_STRICT_IO) { + if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_strict_nobrl_ops; else inode->i_fop = &cifs_file_strict_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + } else if (sbflags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_nobrl_ops; else { /* not direct, send byte range locks */ inode->i_fop = &cifs_file_ops; } /* check if server can support readahead */ - if (cifs_sb_master_tcon(cifs_sb)->ses->server->max_read < - PAGE_SIZE + MAX_CIFS_HDR_SIZE) + if (tcon->ses->server->max_read < PAGE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else inode->i_data.a_ops = &cifs_addr_ops; @@ -194,8 +195,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, inode->i_gid = fattr->cf_gid; /* if dynperm is set, don't clobber existing mode */ - if (inode_state_read(inode) & I_NEW || - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) + if ((inode_state_read(inode) & I_NEW) || + !(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_DYNPERM)) inode->i_mode = fattr->cf_mode; cifs_i->cifsAttrs = fattr->cf_cifsattrs; @@ -218,13 +219,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, */ if (is_size_safe_to_change(cifs_i, fattr->cf_eof, from_readdir)) { i_size_write(inode, fattr->cf_eof); - - /* - * i_blocks is not related to (i_size / i_blksize), - * but instead 512 byte (2**9) size is required for - * calculating num blocks. - */ - inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9; + inode->i_blocks = CIFS_INO_BLOCKS(fattr->cf_bytes); } if (S_ISLNK(fattr->cf_mode) && fattr->cf_symlink_target) { @@ -248,10 +243,8 @@ cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr) { struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - return; - - fattr->cf_uniqueid = iunique(sb, ROOT_I); + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) + fattr->cf_uniqueid = iunique(sb, ROOT_I); } /* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */ @@ -259,6 +252,8 @@ void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb) { + unsigned int sbflags; + memset(fattr, 0, sizeof(*fattr)); fattr->cf_uniqueid = le64_to_cpu(info->UniqueId); fattr->cf_bytes = le64_to_cpu(info->NumOfBytes); @@ -317,8 +312,9 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, break; } + sbflags = cifs_sb_flags(cifs_sb); fattr->cf_uid = cifs_sb->ctx->linux_uid; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) { + if (!(sbflags & CIFS_MOUNT_OVERR_UID)) { u64 id = le64_to_cpu(info->Uid); if (id < ((uid_t)-1)) { kuid_t uid = make_kuid(&init_user_ns, id); @@ -328,7 +324,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, } fattr->cf_gid = cifs_sb->ctx->linux_gid; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) { + if (!(sbflags & CIFS_MOUNT_OVERR_GID)) { u64 id = le64_to_cpu(info->Gid); if (id < ((gid_t)-1)) { kgid_t gid = make_kgid(&init_user_ns, id); @@ -382,7 +378,7 @@ static int update_inode_info(struct super_block *sb, * * If file type or uniqueid is different, return error. */ - if (unlikely((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && + if (unlikely((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) && CIFS_I(*inode)->uniqueid != fattr->cf_uniqueid)) { CIFS_I(*inode)->time = 0; /* force reval */ return -ESTALE; @@ -468,7 +464,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path, cifs_fill_uniqueid(sb, fattr); /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1081,7 +1077,7 @@ cifs_backup_query_path_info(int xid, else if ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) info.info_level = SMB_FIND_FILE_INFO_STANDARD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + else if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) info.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; else /* no srvino useful for fallback to some netapp */ info.info_level = SMB_FIND_FILE_DIRECTORY_INFO; @@ -1109,7 +1105,7 @@ static void cifs_set_fattr_ino(int xid, struct cifs_tcon *tcon, struct super_blo struct TCP_Server_Info *server = tcon->ses->server; int rc; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) { if (*inode) fattr->cf_uniqueid = CIFS_I(*inode)->uniqueid; else @@ -1263,14 +1259,15 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, struct inode **inode, const char *full_path) { - struct cifs_open_info_data tmp_data = {}; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - struct tcon_link *tlink; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_open_info_data tmp_data = {}; void *smb1_backup_rsp_buf = NULL; - int rc = 0; + struct TCP_Server_Info *server; + struct cifs_tcon *tcon; + struct tcon_link *tlink; + unsigned int sbflags; int tmprc = 0; + int rc = 0; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -1370,16 +1367,17 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY handle_mnt_opt: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + sbflags = cifs_sb_flags(cifs_sb); /* query for SFU type info if supported and needed */ if ((fattr->cf_cifsattrs & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { + (sbflags & CIFS_MOUNT_UNX_EMUL)) { tmprc = cifs_sfu_type(fattr, full_path, cifs_sb, xid); if (tmprc) cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc); } /* fill in 0777 bits from ACL */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) { + if (sbflags & CIFS_MOUNT_MODE_FROM_SID) { rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode, true, full_path, fid); if (rc == -EREMOTE) @@ -1389,7 +1387,7 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, __func__, rc); goto out; } - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { + } else if (sbflags & CIFS_MOUNT_CIFS_ACL) { rc = cifs_acl_to_fattr(cifs_sb, fattr, *inode, false, full_path, fid); if (rc == -EREMOTE) @@ -1399,7 +1397,7 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, __func__, rc); goto out; } - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + } else if (sbflags & CIFS_MOUNT_UNX_EMUL) /* fill in remaining high mode bits e.g. SUID, VTX */ cifs_sfu_mode(fattr, full_path, cifs_sb, xid); else if (!(tcon->posix_extensions)) @@ -1409,7 +1407,7 @@ static int cifs_get_fattr(struct cifs_open_info_data *data, /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1509,7 +1507,7 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data, * 3. Tweak fattr based on mount options */ /* check for Minshall+French symlinks */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_MF_SYMLINKS) { tmprc = check_mf_symlink(xid, tcon, cifs_sb, fattr, full_path); cifs_dbg(FYI, "check_mf_symlink: %d\n", tmprc); } @@ -1660,7 +1658,7 @@ struct inode *cifs_root_iget(struct super_block *sb) int len; int rc; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + if ((cifs_sb_flags(cifs_sb) & CIFS_MOUNT_USE_PREFIX_PATH) && cifs_sb->prepath) { len = strlen(cifs_sb->prepath); path = kzalloc(len + 2 /* leading sep + null */, GFP_KERNEL); @@ -2098,8 +2096,9 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, const char *full_path, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, const unsigned int xid) { - int rc = 0; struct inode *inode = NULL; + unsigned int sbflags; + int rc = 0; if (tcon->posix_extensions) { rc = smb311_posix_get_inode_info(&inode, full_path, @@ -2139,6 +2138,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, if (parent->i_mode & S_ISGID) mode |= S_ISGID; + sbflags = cifs_sb_flags(cifs_sb); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (tcon->unix_ext) { struct cifs_unix_set_info_args args = { @@ -2148,7 +2148,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, .mtime = NO_CHANGE_64, .device = 0, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); if (parent->i_mode & S_ISGID) args.gid = parent->i_gid; @@ -2166,14 +2166,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, { #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ struct TCP_Server_Info *server = tcon->ses->server; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + if (!(sbflags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) server->ops->mkdir_setinfo(inode, full_path, cifs_sb, tcon, xid); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + if (sbflags & CIFS_MOUNT_DYNPERM) inode->i_mode = (mode | S_IFDIR); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { inode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) inode->i_gid = parent->i_gid; @@ -2686,7 +2686,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct cifsInodeInfo *cifs_i = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct cached_fid *cfid = NULL; @@ -2727,7 +2727,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) } /* hardlinked files w/ noserverino get "special" treatment */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM) && S_ISREG(inode->i_mode) && inode->i_nlink != 1) return true; @@ -2752,10 +2752,10 @@ cifs_wait_bit_killable(struct wait_bit_key *key, int mode) int cifs_revalidate_mapping(struct inode *inode) { - int rc; struct cifsInodeInfo *cifs_inode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); unsigned long *flags = &cifs_inode->flags; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + int rc; /* swapfiles are not supposed to be shared */ if (IS_SWAPFILE(inode)) @@ -2768,7 +2768,7 @@ cifs_revalidate_mapping(struct inode *inode) if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) { /* for cache=singleclient, do not invalidate */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE) goto skip_invalidate; cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size; @@ -2892,10 +2892,11 @@ int cifs_revalidate_dentry(struct dentry *dentry) int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { - struct dentry *dentry = path->dentry; - struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(path->dentry); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); + unsigned int sbflags; int rc; if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) @@ -2952,12 +2953,13 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path, * enabled, and the admin hasn't overridden them, set the ownership * to the fsuid/fsgid of the current process. */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && + sbflags = cifs_sb_flags(cifs_sb); + if ((sbflags & CIFS_MOUNT_MULTIUSER) && + !(sbflags & CIFS_MOUNT_CIFS_ACL) && !tcon->unix_ext) { - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) + if (!(sbflags & CIFS_MOUNT_OVERR_UID)) stat->uid = current_fsuid(); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) + if (!(sbflags & CIFS_MOUNT_OVERR_GID)) stat->gid = current_fsgid(); } return 0; @@ -2989,7 +2991,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, } } - cfile = find_readable_file(cifs_i, false); + cfile = find_readable_file(cifs_i, FIND_ANY); if (cfile == NULL) return -EINVAL; @@ -3007,6 +3009,11 @@ void cifs_setsize(struct inode *inode, loff_t offset) { spin_lock(&inode->i_lock); i_size_write(inode, offset); + /* + * Until we can query the server for actual allocation size, + * this is best estimate we have for blocks allocated for a file. + */ + inode->i_blocks = CIFS_INO_BLOCKS(offset); spin_unlock(&inode->i_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); truncate_pagecache(inode, offset); @@ -3042,7 +3049,7 @@ int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, size, false); cifs_dbg(FYI, "%s: set_file_size: rc = %d\n", __func__, rc); } else { - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cifsInode, FIND_FSUID_ONLY); if (open_file) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -3079,14 +3086,6 @@ int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, if (rc == 0) { netfs_resize_file(&cifsInode->netfs, size, true); cifs_setsize(inode, size); - /* - * i_blocks is not related to (i_size / i_blksize), but instead - * 512 byte (2**9) size is required for calculating num blocks. - * Until we can query the server for actual allocation size, - * this is best estimate we have for blocks allocated for a file - * Number of blocks must be rounded up so size 1 is not 0 blocks - */ - inode->i_blocks = (512 - 1 + size) >> 9; } return rc; @@ -3102,7 +3101,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) void *page = alloc_dentry_path(); struct inode *inode = d_inode(direntry); struct cifsInodeInfo *cifsInode = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); struct tcon_link *tlink; struct cifs_tcon *pTcon; struct cifs_unix_set_info_args *args = NULL; @@ -3113,7 +3112,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); @@ -3211,7 +3210,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) open_file->fid.netfid, open_file->pid); } else { - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cifsInode, FIND_FSUID_ONLY); if (open_file) { pTcon = tlink_tcon(open_file->tlink); rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, @@ -3266,26 +3265,26 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) static int cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) { - unsigned int xid; + struct inode *inode = d_inode(direntry); + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); + struct cifsFileInfo *cfile = NULL; + void *page = alloc_dentry_path(); + __u64 mode = NO_CHANGE_64; kuid_t uid = INVALID_UID; kgid_t gid = INVALID_GID; - struct inode *inode = d_inode(direntry); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifsInodeInfo *cifsInode = CIFS_I(inode); - struct cifsFileInfo *cfile = NULL; const char *full_path; - void *page = alloc_dentry_path(); - int rc = -EACCES; __u32 dosattr = 0; - __u64 mode = NO_CHANGE_64; - bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; + int rc = -EACCES; + unsigned int xid; xid = get_xid(); cifs_dbg(FYI, "setattr on file %pd attrs->ia_valid 0x%x\n", direntry, attrs->ia_valid); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + if (sbflags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; rc = setattr_prepare(&nop_mnt_idmap, direntry, attrs); @@ -3346,8 +3345,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_GID) gid = attrs->ia_gid; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) { + if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) { if (uid_valid(uid) || gid_valid(gid)) { mode = NO_CHANGE_64; rc = id_mode_to_cifs_acl(inode, full_path, &mode, @@ -3358,9 +3356,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) goto cifs_setattr_exit; } } - } else - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) + } else if (!(sbflags & CIFS_MOUNT_SET_UID)) { attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); + } /* skip mode change if it's just for clearing setuid/setgid */ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -3369,9 +3367,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_MODE) { mode = attrs->ia_mode; rc = 0; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) || - posix) { + if ((sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) || + cifs_sb_master_tcon(cifs_sb)->posix_extensions) { rc = id_mode_to_cifs_acl(inode, full_path, &mode, INVALID_UID, INVALID_GID); if (rc) { @@ -3393,7 +3390,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) dosattr = cifsInode->cifsAttrs | ATTR_READONLY; /* fix up mode if we're not using dynperm */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + if ((sbflags & CIFS_MOUNT_DYNPERM) == 0) attrs->ia_mode = inode->i_mode & ~S_IWUGO; } else if ((mode & S_IWUGO) && (cifsInode->cifsAttrs & ATTR_READONLY)) { @@ -3404,7 +3401,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) dosattr |= ATTR_NORMAL; /* reset local inode permissions to normal */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + if (!(sbflags & CIFS_MOUNT_DYNPERM)) { attrs->ia_mode &= ~(S_IALLUGO); if (S_ISDIR(inode->i_mode)) attrs->ia_mode |= @@ -3413,7 +3410,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) attrs->ia_mode |= cifs_sb->ctx->file_mode; } - } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + } else if (!(sbflags & CIFS_MOUNT_DYNPERM)) { /* ignore mode change - ATTR_READONLY hasn't changed */ attrs->ia_valid &= ~ATTR_MODE; }
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 8dc2651..9afab32 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c
@@ -216,7 +216,7 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: - sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; + atomic_or(CIFS_MOUNT_SHUTDOWN, &sbi->mnt_cifs_flags); goto shutdown_good; default: rc = -EINVAL;
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index a2f7bfa..434e8fe 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c
@@ -544,14 +544,15 @@ int cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, struct dentry *direntry, const char *symname) { - int rc = -EOPNOTSUPP; - unsigned int xid; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + struct inode *newinode = NULL; struct tcon_link *tlink; struct cifs_tcon *pTcon; const char *full_path; + int rc = -EOPNOTSUPP; + unsigned int sbflags; + unsigned int xid; void *page; - struct inode *newinode = NULL; if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); @@ -580,6 +581,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, cifs_dbg(FYI, "symname is %s\n", symname); /* BB what if DFS and this volume is on different share? BB */ + sbflags = cifs_sb_flags(cifs_sb); rc = -EOPNOTSUPP; switch (cifs_symlink_type(cifs_sb)) { case CIFS_SYMLINK_TYPE_UNIX: @@ -594,14 +596,14 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, break; case CIFS_SYMLINK_TYPE_MFSYMLINKS: - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { + if (sbflags & CIFS_MOUNT_MF_SYMLINKS) { rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); } break; case CIFS_SYMLINK_TYPE_SFU: - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (sbflags & CIFS_MOUNT_UNX_EMUL) { rc = __cifs_sfu_make_node(xid, inode, direntry, pTcon, full_path, S_IFLNK, 0, symname);
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 22cde46..2aff1ca 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c
@@ -28,6 +28,11 @@ #include "fs_context.h" #include "cached_dir.h" +struct tcon_list { + struct list_head entry; + struct cifs_tcon *tcon; +}; + /* The xid serves as a useful identifier for each incoming vfs request, in a similar way to the mid which is useful to track each sent smb, and CurrentXid can also provide a running counter (although it @@ -275,13 +280,15 @@ dump_smb(void *buf, int smb_buf_length) void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_SERVER_INUM) { struct cifs_tcon *tcon = NULL; if (cifs_sb->master_tlink) tcon = cifs_sb_master_tcon(cifs_sb); - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + atomic_andnot(CIFS_MOUNT_SERVER_INUM, &cifs_sb->mnt_cifs_flags); cifs_sb->mnt_cifs_serverino_autodisabled = true; cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s\n", tcon ? tcon->tree_name : "new server"); @@ -382,11 +389,13 @@ void cifs_done_oplock_break(struct cifsInodeInfo *cinode) bool backup_cred(struct cifs_sb_info *cifs_sb) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { + unsigned int sbflags = cifs_sb_flags(cifs_sb); + + if (sbflags & CIFS_MOUNT_CIFS_BACKUPUID) { if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid())) return true; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { + if (sbflags & CIFS_MOUNT_CIFS_BACKUPGID) { if (in_group_p(cifs_sb->ctx->backupgid)) return true; } @@ -550,6 +559,43 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) } } +void cifs_close_all_deferred_files_sb(struct cifs_sb_info *cifs_sb) +{ + struct rb_root *root = &cifs_sb->tlink_tree; + struct rb_node *node; + struct cifs_tcon *tcon; + struct tcon_link *tlink; + struct tcon_list *tmp_list, *q; + LIST_HEAD(tcon_head); + + spin_lock(&cifs_sb->tlink_tree_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + tlink = rb_entry(node, struct tcon_link, tl_rbnode); + tcon = tlink_tcon(tlink); + if (IS_ERR(tcon)) + continue; + tmp_list = kmalloc_obj(struct tcon_list, GFP_ATOMIC); + if (tmp_list == NULL) + break; + tmp_list->tcon = tcon; + /* Take a reference on tcon to prevent it from being freed */ + spin_lock(&tcon->tc_lock); + ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_close_defer_files); + spin_unlock(&tcon->tc_lock); + list_add_tail(&tmp_list->entry, &tcon_head); + } + spin_unlock(&cifs_sb->tlink_tree_lock); + + list_for_each_entry_safe(tmp_list, q, &tcon_head, entry) { + cifs_close_all_deferred_files(tmp_list->tcon); + list_del(&tmp_list->entry); + cifs_put_tcon(tmp_list->tcon, netfs_trace_tcon_ref_put_close_defer_files); + kfree(tmp_list); + } +} + void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, struct dentry *dentry) { @@ -955,7 +1001,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); } - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + atomic_or(CIFS_MOUNT_USE_PREFIX_PATH, &cifs_sb->mnt_cifs_flags); return 0; } @@ -984,7 +1030,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, * look up or tcon is not DFS. */ if (strlen(full_path) < 2 || !cifs_sb || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || + (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS) || !is_tcon_dfs(tcon)) return 0;
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 8615a87..be22bbc 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c
@@ -121,7 +121,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, * want to clobber the existing one with the one that * the readdir code created. */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_SERVER_INUM)) fattr->cf_uniqueid = CIFS_I(inode)->uniqueid; /* @@ -177,6 +177,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) struct cifs_open_info_data data = { .reparse = { .tag = fattr->cf_cifstag, }, }; + unsigned int sbflags; fattr->cf_uid = cifs_sb->ctx->linux_uid; fattr->cf_gid = cifs_sb->ctx->linux_gid; @@ -215,12 +216,12 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) + sbflags = cifs_sb_flags(cifs_sb); + if (sbflags & (CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && - fattr->cf_cifsattrs & ATTR_SYSTEM) { + if ((sbflags & CIFS_MOUNT_UNX_EMUL) && + (fattr->cf_cifsattrs & ATTR_SYSTEM)) { if (fattr->cf_eof == 0) { fattr->cf_mode &= ~S_IFMT; fattr->cf_mode |= S_IFIFO; @@ -345,13 +346,14 @@ static int _initiate_cifs_search(const unsigned int xid, struct file *file, const char *full_path) { + struct cifs_sb_info *cifs_sb = CIFS_SB(file); + struct tcon_link *tlink = NULL; + struct TCP_Server_Info *server; + struct cifsFileInfo *cifsFile; + struct cifs_tcon *tcon; + unsigned int sbflags; __u16 search_flags; int rc = 0; - struct cifsFileInfo *cifsFile; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); - struct tcon_link *tlink = NULL; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; if (file->private_data == NULL) { tlink = cifs_sb_tlink(cifs_sb); @@ -385,6 +387,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file, cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos); ffirst_retry: + sbflags = cifs_sb_flags(cifs_sb); /* test for Unix extensions */ /* but now check for them on the share/mount not on the SMB session */ /* if (cap_unix(tcon->ses) { */ @@ -395,7 +398,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file, else if ((tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find) == 0) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + } else if (sbflags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; @@ -411,8 +414,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file, if (rc == 0) { cifsFile->invalidHandle = false; - } else if ((rc == -EOPNOTSUPP) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + } else if (rc == -EOPNOTSUPP && (sbflags & CIFS_MOUNT_SERVER_INUM)) { cifs_autodisable_serverino(cifs_sb); goto ffirst_retry; } @@ -690,7 +692,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, loff_t first_entry_in_buffer; loff_t index_to_find = pos; struct cifsFileInfo *cfile = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); struct TCP_Server_Info *server = tcon->ses->server; /* check if index in the buffer */ @@ -955,6 +957,7 @@ static int cifs_filldir(char *find_entry, struct file *file, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_dirent de = { NULL, }; struct cifs_fattr fattr; + unsigned int sbflags; struct qstr name; int rc = 0; @@ -1019,15 +1022,15 @@ static int cifs_filldir(char *find_entry, struct file *file, break; } - if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + sbflags = cifs_sb_flags(cifs_sb); + if (de.ino && (sbflags & CIFS_MOUNT_SERVER_INUM)) { fattr.cf_uniqueid = de.ino; } else { fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && - couldbe_mf_symlink(&fattr)) + if ((sbflags & CIFS_MOUNT_MF_SYMLINKS) && couldbe_mf_symlink(&fattr)) /* * trying to get the type and mode can be slow, * so just call those regular files for now, and mark @@ -1058,7 +1061,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) const char *full_path; void *page = alloc_dentry_path(); struct cached_fid *cfid = NULL; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(file); xid = get_xid();
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index ce9b923..cd1e1ea 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c
@@ -55,17 +55,18 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, const char *full_path, const char *symname) { struct reparse_symlink_data_buffer *buf = NULL; - struct cifs_open_info_data data = {}; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); const char *symroot = cifs_sb->ctx->symlinkroot; - struct inode *new; - struct kvec iov; - __le16 *path = NULL; - bool directory; - char *symlink_target = NULL; - char *sym = NULL; + struct cifs_open_info_data data = {}; char sep = CIFS_DIR_SEP(cifs_sb); + char *symlink_target = NULL; u16 len, plen, poff, slen; + unsigned int sbflags; + __le16 *path = NULL; + struct inode *new; + char *sym = NULL; + struct kvec iov; + bool directory; int rc = 0; if (strlen(symname) > REPARSE_SYM_PATH_MAX) @@ -83,8 +84,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && - symroot && symname[0] == '/') { + sbflags = cifs_sb_flags(cifs_sb); + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -164,7 +165,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * mask these characters in NT object prefix by '_' and then change * them back. */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') sym[0] = sym[1] = sym[2] = sym[5] = '_'; path = cifs_convert_path_to_utf16(sym, cifs_sb); @@ -173,7 +174,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { sym[0] = '\\'; sym[1] = sym[2] = '?'; sym[5] = ':'; @@ -197,7 +198,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, slen = 2 * UniStrnlen((wchar_t *)path, REPARSE_SYM_PATH_MAX); poff = 0; plen = slen; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(sbflags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { /* * For absolute NT symlinks skip leading "\\??\\" in PrintName as * PrintName is user visible location in DOS/Win32 format (not in NT format). @@ -824,7 +825,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + if (!(cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) && symroot && !relative) { /* * This is an absolute symlink from the server which does not
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 570b0d2..0164dc4 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h
@@ -33,7 +33,7 @@ static inline kuid_t wsl_make_kuid(struct cifs_sb_info *cifs_sb, { u32 uid = le32_to_cpu(*(__le32 *)ptr); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_UID) return cifs_sb->ctx->linux_uid; return make_kuid(current_user_ns(), uid); } @@ -43,7 +43,7 @@ static inline kgid_t wsl_make_kgid(struct cifs_sb_info *cifs_sb, { u32 gid = le32_to_cpu(*(__le32 *)ptr); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_OVERR_GID) return cifs_sb->ctx->linux_gid; return make_kgid(current_user_ns(), gid); }
diff --git a/fs/smb/client/smb1encrypt.c b/fs/smb/client/smb1encrypt.c index 0dbbce2..bf10fde 100644 --- a/fs/smb/client/smb1encrypt.c +++ b/fs/smb/client/smb1encrypt.c
@@ -11,6 +11,7 @@ #include <linux/fips.h> #include <crypto/md5.h> +#include <crypto/utils.h> #include "cifsproto.h" #include "smb1proto.h" #include "cifs_debug.h" @@ -131,7 +132,7 @@ int cifs_verify_signature(struct smb_rqst *rqst, /* cifs_dump_mem("what we think it should be: ", what_we_think_sig_should_be, 16); */ - if (memcmp(server_response_sig, what_we_think_sig_should_be, 8)) + if (crypto_memneq(server_response_sig, what_we_think_sig_should_be, 8)) return -EACCES; else return 0;
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index aed49aa..9694117 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c
@@ -49,6 +49,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + unsigned int sbflags; cifs_dbg(FYI, "unix caps which server supports %lld\n", cap); /* @@ -75,14 +76,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP) cifs_dbg(VFS, "per-share encryption not supported yet\n"); + if (cifs_sb) + sbflags = cifs_sb_flags(cifs_sb); + cap &= CIFS_UNIX_CAP_MASK; if (ctx && ctx->no_psx_acl) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { cifs_dbg(FYI, "negotiated posix acl support\n"); if (cifs_sb) - cifs_sb->mnt_cifs_flags |= - CIFS_MOUNT_POSIXACL; + sbflags |= CIFS_MOUNT_POSIXACL; } if (ctx && ctx->posix_paths == 0) @@ -90,10 +93,12 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { cifs_dbg(FYI, "negotiate posix pathnames\n"); if (cifs_sb) - cifs_sb->mnt_cifs_flags |= - CIFS_MOUNT_POSIX_PATHS; + sbflags |= CIFS_MOUNT_POSIX_PATHS; } + if (cifs_sb) + atomic_set(&cifs_sb->mnt_cifs_flags, sbflags); + cifs_dbg(FYI, "Negotiate caps 0x%x\n", (int)cap); #ifdef CONFIG_CIFS_DEBUG2 if (cap & CIFS_UNIX_FCNTL_CAP) @@ -955,7 +960,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_tcon *tcon; /* if the file is already open for write, just use that fileid */ - open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + open_file = find_writable_file(cinode, FIND_FSUID_ONLY); if (open_file) { fid.netfid = open_file->fid.netfid; @@ -1147,7 +1152,7 @@ static int cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __u8 op; op = !!((oplock & CIFS_CACHE_READ_FLG) || (sbflags & CIFS_MOUNT_RO_CACHE)); @@ -1282,7 +1287,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode); + unsigned int sbflags = cifs_sb_flags(cifs_sb); struct inode *newinode = NULL; int rc; @@ -1298,7 +1304,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, .mtime = NO_CHANGE_64, .device = dev, }; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (sbflags & CIFS_MOUNT_SET_UID) { args.uid = current_fsuid(); args.gid = current_fsgid(); } else { @@ -1317,7 +1323,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, if (rc == 0) d_instantiate(dentry, newinode); return rc; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + } else if (sbflags & CIFS_MOUNT_UNX_EMUL) { /* * Check if mounted with mount parm 'sfu' mount parm. * SFU emulation should work with all servers
diff --git a/fs/smb/client/smb1transport.c b/fs/smb/client/smb1transport.c index 38d6d55..53abb29 100644 --- a/fs/smb/client/smb1transport.c +++ b/fs/smb/client/smb1transport.c
@@ -460,7 +460,7 @@ check_smb_hdr(struct smb_hdr *smb) return 0; /* - * Windows NT server returns error resposne (e.g. STATUS_DELETE_PENDING + * Windows NT server returns error response (e.g. STATUS_DELETE_PENDING * or STATUS_OBJECT_NAME_NOT_FOUND or ERRDOS/ERRbadfile or any other) * for some TRANS2 requests without the RESPONSE flag set in header. */
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 1ab41de..ed651c9 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c
@@ -72,7 +72,7 @@ int smb2_fix_symlink_target_type(char **target, bool directory, struct cifs_sb_i * POSIX server does not distinguish between symlinks to file and * symlink directory. So nothing is needed to fix on the client side. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_POSIX_PATHS) return 0; if (!*target)
diff --git a/fs/smb/client/smb2glob.h b/fs/smb/client/smb2glob.h index e56e4d4..19da74b 100644 --- a/fs/smb/client/smb2glob.h +++ b/fs/smb/client/smb2glob.h
@@ -46,4 +46,16 @@ enum smb2_compound_ops { #define END_OF_CHAIN 4 #define RELATED_REQUEST 8 +/* + ***************************************************************** + * Struct definitions go here + ***************************************************************** + */ + +struct status_to_posix_error { + __u32 smb2_status; + int posix_error; + char *status_string; +}; + #endif /* _SMB2_GLOB_H */
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 195a38f..364bdcf 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c
@@ -325,7 +325,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - sizeof(struct smb311_posix_qinfo *) + + sizeof(struct smb311_posix_qinfo) + (PATH_MAX * 2) + (sizeof(struct smb_sid) * 2), 0, NULL); } else { @@ -335,7 +335,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - sizeof(struct smb311_posix_qinfo *) + + sizeof(struct smb311_posix_qinfo) + (PATH_MAX * 2) + (sizeof(struct smb_sid) * 2), 0, NULL); } @@ -1156,7 +1156,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_i = CIFS_I(inode); dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); - cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, name, FIND_ANY, &cfile); oparms = CIFS_OPARMS(cifs_sb, tcon, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, @@ -1216,6 +1216,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, memset(resp_buftype, 0, sizeof(resp_buftype)); memset(rsp_iov, 0, sizeof(rsp_iov)); + memset(open_iov, 0, sizeof(open_iov)); rqst[0].rq_iov = open_iov; rqst[0].rq_nvec = ARRAY_SIZE(open_iov); @@ -1240,14 +1241,15 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, creq = rqst[0].rq_iov[0].iov_base; creq->ShareAccess = FILE_SHARE_DELETE_LE; + memset(&close_iov, 0, sizeof(close_iov)); rqst[1].rq_iov = &close_iov; rqst[1].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, false); - smb2_set_related(&rqst[1]); if (rc) goto err_free; + smb2_set_related(&rqst[1]); if (retries) { /* Back-off before retry */ @@ -1334,14 +1336,13 @@ int smb2_rename_path(const unsigned int xid, __u32 co = file_create_options(source_dentry); drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb); - cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WITH_DELETE, &cfile); int rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, co, DELETE, SMB2_OP_RENAME, cfile, source_dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); - cifs_get_writable_path(tcon, from_name, - FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WITH_DELETE, &cfile); rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, co, DELETE, SMB2_OP_RENAME, cfile, NULL); } @@ -1375,7 +1376,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, in_iov.iov_base = &eof; in_iov.iov_len = sizeof(eof); - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE); @@ -1385,7 +1386,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, cfile, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, &(int){SMB2_OP_SET_EOF}, 1, @@ -1415,7 +1416,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, (buf->LastWriteTime == 0) && (buf->ChangeTime == 0)) { if (buf->Attributes == 0) goto out; /* would be a no op, no sense sending this */ - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); } oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_ATTRIBUTES, @@ -1474,7 +1475,7 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, if (tcon->posix_extensions) { cmds[1] = SMB2_OP_POSIX_QUERY_INFO; - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { @@ -1483,7 +1484,7 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, } } else { cmds[1] = SMB2_OP_QUERY_INFO; - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, in_iov, cmds, 2, cfile, out_iov, out_buftype, NULL); if (!rc) { @@ -1634,13 +1635,12 @@ int smb2_rename_pending_delete(const char *full_path, iov[1].iov_base = utf16_path; iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path); - cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_WITH_DELETE, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, cmds, num_cmds, cfile, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease\n"); - cifs_get_writable_path(tcon, full_path, - FIND_WR_WITH_DELETE, &cfile); + cifs_get_writable_path(tcon, full_path, FIND_WITH_DELETE, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, cmds, num_cmds, cfile, NULL, NULL, NULL); }
diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c index cd03636..2b8782c 100644 --- a/fs/smb/client/smb2maperror.c +++ b/fs/smb/client/smb2maperror.c
@@ -8,7 +8,6 @@ * */ #include <linux/errno.h> -#include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "smb2proto.h" @@ -16,12 +15,6 @@ #include "../common/smb2status.h" #include "trace.h" -struct status_to_posix_error { - __u32 smb2_status; - int posix_error; - char *status_string; -}; - static const struct status_to_posix_error smb2_error_map_table[] = { /* * Automatically generated by the `gen_smb2_mapping` script, @@ -115,10 +108,22 @@ int __init smb2_init_maperror(void) return 0; } -#define SMB_CLIENT_KUNIT_AVAILABLE \ - ((IS_MODULE(CONFIG_CIFS) && IS_ENABLED(CONFIG_KUNIT)) || \ - (IS_BUILTIN(CONFIG_CIFS) && IS_BUILTIN(CONFIG_KUNIT))) +#if IS_ENABLED(CONFIG_SMB_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_SMB_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "smb2maperror_test") -#if SMB_CLIENT_KUNIT_AVAILABLE && IS_ENABLED(CONFIG_SMB_KUNIT_TESTS) -#include "smb2maperror_test.c" -#endif /* CONFIG_SMB_KUNIT_TESTS */ +/* Previous prototype for eliminating the build warning. */ +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status); + +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status) +{ + return smb2_get_err_map(smb2_status); +} +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_get_err_map_test); + +const struct status_to_posix_error *smb2_error_map_table_test = smb2_error_map_table; +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_error_map_table_test); + +unsigned int smb2_error_map_num = ARRAY_SIZE(smb2_error_map_table); +EXPORT_SYMBOL_FOR_SMB_TEST(smb2_error_map_num); +#endif
diff --git a/fs/smb/client/smb2maperror_test.c b/fs/smb/client/smb2maperror_test.c index 38ea6b8..8c47dea 100644 --- a/fs/smb/client/smb2maperror_test.c +++ b/fs/smb/client/smb2maperror_test.c
@@ -9,13 +9,18 @@ */ #include <kunit/test.h> +#include "smb2glob.h" + +const struct status_to_posix_error *smb2_get_err_map_test(__u32 smb2_status); +extern const struct status_to_posix_error *smb2_error_map_table_test; +extern unsigned int smb2_error_map_num; static void test_cmp_map(struct kunit *test, const struct status_to_posix_error *expect) { const struct status_to_posix_error *result; - result = smb2_get_err_map(expect->smb2_status); + result = smb2_get_err_map_test(expect->smb2_status); KUNIT_EXPECT_PTR_NE(test, NULL, result); KUNIT_EXPECT_EQ(test, expect->smb2_status, result->smb2_status); KUNIT_EXPECT_EQ(test, expect->posix_error, result->posix_error); @@ -26,8 +31,8 @@ static void maperror_test_check_search(struct kunit *test) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(smb2_error_map_table); i++) - test_cmp_map(test, &smb2_error_map_table[i]); + for (i = 0; i < smb2_error_map_num; i++) + test_cmp_map(test, &smb2_error_map_table_test[i]); } static struct kunit_case maperror_test_cases[] = { @@ -43,3 +48,4 @@ static struct kunit_suite maperror_suite = { kunit_test_suite(maperror_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests of SMB2 maperror");
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index e19674d..973fce3 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c
@@ -455,17 +455,8 @@ smb2_calc_size(void *buf) __le16 * cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) { - int len; const char *start_of_path; - __le16 *to; - int map_type; - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) - map_type = SFM_MAP_UNI_RSVD; - else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - map_type = SFU_MAP_UNI_RSVD; - else - map_type = NO_MAP_UNI_RSVD; + int len; /* Windows doesn't allow paths beginning with \ */ if (from[0] == '\\') @@ -479,14 +470,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) } else start_of_path = from; - to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, - cifs_sb->local_nls, map_type); - return to; + return cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, + cifs_sb->local_nls, cifs_remap(cifs_sb)); } __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __le32 lease = 0; if ((oplock & CIFS_CACHE_WRITE_FLG) || (sbflags & CIFS_MOUNT_RW_CACHE))
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index fea9a35..509fcea 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c
@@ -628,6 +628,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, struct smb_sockaddr_in6 *p6; struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL; struct cifs_server_iface tmp_iface; + __be16 port; ssize_t bytes_left; size_t next = 0; int nb_iface = 0; @@ -662,6 +663,15 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } + spin_lock(&ses->server->srv_lock); + if (ses->server->dstaddr.ss_family == AF_INET) + port = ((struct sockaddr_in *)&ses->server->dstaddr)->sin_port; + else if (ses->server->dstaddr.ss_family == AF_INET6) + port = ((struct sockaddr_in6 *)&ses->server->dstaddr)->sin6_port; + else + port = cpu_to_be16(CIFS_PORT); + spin_unlock(&ses->server->srv_lock); + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); /* default to 1Gbps when link speed is unset */ @@ -682,7 +692,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */ - addr4->sin_port = cpu_to_be16(CIFS_PORT); + addr4->sin_port = port; cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__, &addr4->sin_addr); @@ -696,7 +706,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */ addr6->sin6_flowinfo = 0; addr6->sin6_scope_id = 0; - addr6->sin6_port = cpu_to_be16(CIFS_PORT); + addr6->sin6_port = port; cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__, &addr6->sin6_addr); @@ -986,7 +996,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, rc = -EREMOTE; } if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_DFS)) rc = -EOPNOTSUPP; goto out; } @@ -1487,6 +1497,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_file_network_open_info file_inf; struct inode *inode; + u64 asize; int rc; rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, @@ -1510,14 +1521,9 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, inode_set_atime_to_ts(inode, cifs_NTtimeToUnix(file_inf.LastAccessTime)); - /* - * i_blocks is not related to (i_size / i_blksize), - * but instead 512 byte (2**9) size is required for - * calculating num blocks. - */ - if (le64_to_cpu(file_inf.AllocationSize) > 4096) - inode->i_blocks = - (512 - 1 + le64_to_cpu(file_inf.AllocationSize)) >> 9; + asize = le64_to_cpu(file_inf.AllocationSize); + if (asize > 4096) + inode->i_blocks = CIFS_INO_BLOCKS(asize); /* End of file and Attributes should not have to be updated on close */ spin_unlock(&inode->i_lock); @@ -2194,14 +2200,6 @@ smb2_duplicate_extents(const unsigned int xid, rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); if (rc) goto duplicate_extents_out; - - /* - * Although also could set plausible allocation size (i_blocks) - * here in addition to setting the file size, in reflink - * it is likely that the target file is sparse. Its allocation - * size will be queried on next revalidate, but it is important - * to make sure that file's cached size is updated immediately - */ netfs_resize_file(netfs_inode(inode), dest_off + len, true); cifs_setsize(inode, dest_off + len); } @@ -2691,7 +2689,7 @@ static int smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode, unsigned int oplock) { - unsigned int sbflags = CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags; + unsigned int sbflags = cifs_sb_flags(CIFS_SB(cinode)); __u8 op; if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) @@ -3352,7 +3350,7 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, struct cifsFileInfo *open_file = NULL; if (inode && !(info & SACL_SECINFO)) - open_file = find_readable_file(CIFS_I(inode), true); + open_file = find_readable_file(CIFS_I(inode), FIND_FSUID_ONLY); if (!open_file || (info & SACL_SECINFO)) return get_smb2_acl_by_path(cifs_sb, path, pacllen, info); @@ -3898,7 +3896,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs * some servers (Windows2016) will not reflect recent writes in * QUERY_ALLOCATED_RANGES until SMB2_flush is called. */ - wrcfile = find_writable_file(cifsi, FIND_WR_ANY); + wrcfile = find_writable_file(cifsi, FIND_ANY); if (wrcfile) { filemap_write_and_wait(inode->i_mapping); smb2_flush_file(xid, tcon, &wrcfile->fid); @@ -5332,7 +5330,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned int sbflags = cifs_sb_flags(CIFS_SB(inode)); int rc = -EOPNOTSUPP; /* @@ -5341,7 +5339,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, * supports block and char device, socket & fifo, * and was used by default in earlier versions of Windows */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + if (sbflags & CIFS_MOUNT_UNX_EMUL) { rc = cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); } else if (CIFS_REPARSE_SUPPORT(tcon)) {
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index ef655ac..5188218 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c
@@ -1714,19 +1714,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) is_binding = (ses->ses_status == SES_GOOD); spin_unlock(&ses->ses_lock); - /* keep session key if binding */ - if (!is_binding) { - kfree_sensitive(ses->auth_key.response); - ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, - GFP_KERNEL); - if (!ses->auth_key.response) { - cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory\n", - msg->sesskey_len); - rc = -ENOMEM; - goto out_put_spnego_key; - } - ses->auth_key.len = msg->sesskey_len; + kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = kmemdup(msg->data, + msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n", + __func__, msg->sesskey_len); + rc = -ENOMEM; + goto out_put_spnego_key; } + ses->auth_key.len = msg->sesskey_len; sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; sess_data->iov[1].iov_len = msg->secblob_len; @@ -3182,22 +3180,19 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, } if ((oparms->disposition != FILE_OPEN) && (oparms->cifs_sb)) { + unsigned int sbflags = cifs_sb_flags(oparms->cifs_sb); bool set_mode; bool set_owner; - if ((oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) && - (oparms->mode != ACL_NO_MODE)) + if ((sbflags & CIFS_MOUNT_MODE_FROM_SID) && + oparms->mode != ACL_NO_MODE) { set_mode = true; - else { + } else { set_mode = false; oparms->mode = ACL_NO_MODE; } - if (oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) - set_owner = true; - else - set_owner = false; - + set_owner = sbflags & CIFS_MOUNT_UID_FROM_ACL; if (set_owner | set_mode) { cifs_dbg(FYI, "add sd with mode 0x%x\n", oparms->mode); rc = add_sd_context(iov, &n_iov, oparms->mode, set_owner); @@ -3994,24 +3989,6 @@ int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, NULL); } -#if 0 -/* currently unused, as now we are doing compounding instead (see smb311_posix_query_path_info) */ -int -SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb311_posix_qinfo *data, u32 *plen) -{ - size_t output_len = sizeof(struct smb311_posix_qinfo *) + - (sizeof(struct smb_sid) * 2) + (PATH_MAX * 2); - *plen = 0; - - return query_info(xid, tcon, persistent_fid, volatile_fid, - SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0, - output_len, sizeof(struct smb311_posix_qinfo), (void **)&data, plen); - /* Note caller must free "data" (passed in above). It may be allocated in query_info call */ -} -#endif - int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, @@ -5330,7 +5307,10 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, memset(&rqst, 0, sizeof(struct smb_rqst)); rqst.rq_iov = iov; - rqst.rq_nvec = n_vec + 1; + /* iov[0] is the SMB header; move payload to rq_iter for encryption safety */ + rqst.rq_nvec = 1; + iov_iter_kvec(&rqst.rq_iter, ITER_SOURCE, &iov[1], n_vec, + io_parms->length); if (retries) { /* Back-off before retry */
diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 78bb99f..30d7009 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h
@@ -224,7 +224,7 @@ struct smb2_file_reparse_point_info { __le32 Tag; } __packed; -/* See MS-FSCC 2.4.21 */ +/* See MS-FSCC 2.4.26 */ struct smb2_file_id_information { __le64 VolumeSerialNumber; __u64 PersistentFileId; /* opaque endianness */ @@ -251,7 +251,10 @@ struct smb2_file_id_extd_directory_info { extern char smb2_padding[7]; -/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +/* + * See POSIX-SMB2 2.2.14.2.16 + * Link: https://gitlab.com/samba-team/smb3-posix-spec/-/blob/master/smb3_posix_extensions.md + */ struct create_posix_rsp { u32 nlink; u32 reparse_tag;
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 881e42c..230bb1e 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h
@@ -167,9 +167,6 @@ int SMB2_flush_init(const unsigned int xid, struct smb_rqst *rqst, struct cifs_tcon *tcon, struct TCP_Server_Info *server, u64 persistent_fid, u64 volatile_fid); void SMB2_flush_free(struct smb_rqst *rqst); -int SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb311_posix_qinfo *data, u32 *plen); int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data);
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 8b9000a..81be2b2 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c
@@ -20,6 +20,7 @@ #include <linux/highmem.h> #include <crypto/aead.h> #include <crypto/sha2.h> +#include <crypto/utils.h> #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -617,7 +618,8 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) return rc; - if (memcmp(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(server_response_sig, shdr->Signature, + SMB2_SIGNATURE_SIZE)) { cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", shdr->Command, shdr->MessageId); return -EACCES;
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 9228f95..acfbb63 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h
@@ -176,6 +176,7 @@ EM(netfs_trace_tcon_ref_get_cached_laundromat, "GET Ch-Lau") \ EM(netfs_trace_tcon_ref_get_cached_lease_break, "GET Ch-Lea") \ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ + EM(netfs_trace_tcon_ref_get_close_defer_files, "GET Cl-Def") \ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \ @@ -187,6 +188,7 @@ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ + EM(netfs_trace_tcon_ref_put_close_defer_files, "PUT Cl-Def") \ EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \ EM(netfs_trace_tcon_ref_put_dfs_refer, "PUT DfsRfr") \ EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 75697f6..05f8099 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c
@@ -807,16 +807,21 @@ cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* - * Return a channel (master if none) of @ses that can be used to send - * regular requests. + * cifs_pick_channel - pick an eligible channel for network operations * - * If we are currently binding a new channel (negprot/sess.setup), - * return the new incomplete channel. + * @ses: session reference + * + * Select an eligible channel (not terminating and not marked as needing + * reconnect), preferring the least loaded one. If no eligible channel is + * found, fall back to the primary channel (index 0). + * + * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is + * NULL. */ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) { uint index = 0; - unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; + unsigned int min_in_flight = UINT_MAX; struct TCP_Server_Info *server = NULL; int i, start, cur; @@ -846,14 +851,8 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) min_in_flight = server->in_flight; index = cur; } - if (server->in_flight > max_in_flight) - max_in_flight = server->in_flight; } - /* if all channels are equally loaded, fall back to round-robin */ - if (min_in_flight == max_in_flight) - index = (uint)start % ses->chan_count; - server = ses->chans[index].server; spin_unlock(&ses->chan_lock);
diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index e1a7d9a..23227f2 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c
@@ -149,7 +149,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) goto out; if (pTcon->ses->server->ops->set_EA) { @@ -309,7 +309,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) goto out; if (pTcon->ses->server->ops->query_all_EAs) @@ -398,7 +398,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) if (unlikely(cifs_forced_shutdown(cifs_sb))) return smb_EIO(smb_eio_trace_forced_shutdown); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_NO_XATTR) return -EOPNOTSUPP; tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig index 2775162..1259487 100644 --- a/fs/smb/server/Kconfig +++ b/fs/smb/server/Kconfig
@@ -13,6 +13,7 @@ select CRYPTO_LIB_MD5 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 + select CRYPTO_LIB_UTILS select CRYPTO_CMAC select CRYPTO_AEAD2 select CRYPTO_CCM
diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 580c4d3..af5f403 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c
@@ -15,6 +15,7 @@ #include <crypto/aead.h> #include <crypto/md5.h> #include <crypto/sha2.h> +#include <crypto/utils.h> #include <linux/random.h> #include <linux/scatterlist.h> @@ -165,7 +166,8 @@ int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE, sess->sess_key); - if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0) + if (crypto_memneq(ntlmv2->ntlmv2_hash, ntlmv2_rsp, + CIFS_HMAC_MD5_HASH_SIZE)) return -EINVAL; return 0; } @@ -587,12 +589,8 @@ static int generate_smb3signingkey(struct ksmbd_session *sess, if (!(conn->dialect >= SMB30_PROT_ID && signing->binding)) memcpy(chann->smb3signingkey, key, SMB3_SIGN_KEY_SIZE); - ksmbd_debug(AUTH, "dumping generated AES signing keys\n"); + ksmbd_debug(AUTH, "generated SMB3 signing key\n"); ksmbd_debug(AUTH, "Session Id %llu\n", sess->id); - ksmbd_debug(AUTH, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key); - ksmbd_debug(AUTH, "Signing Key %*ph\n", - SMB3_SIGN_KEY_SIZE, key); return 0; } @@ -650,23 +648,9 @@ static void generate_smb3encryptionkey(struct ksmbd_conn *conn, ptwin->decryption.context, sess->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE); - ksmbd_debug(AUTH, "dumping generated AES encryption keys\n"); + ksmbd_debug(AUTH, "generated SMB3 encryption/decryption keys\n"); ksmbd_debug(AUTH, "Cipher type %d\n", conn->cipher_type); ksmbd_debug(AUTH, "Session Id %llu\n", sess->id); - ksmbd_debug(AUTH, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key); - if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || - conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) { - ksmbd_debug(AUTH, "ServerIn Key %*ph\n", - SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3encryptionkey); - ksmbd_debug(AUTH, "ServerOut Key %*ph\n", - SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3decryptionkey); - } else { - ksmbd_debug(AUTH, "ServerIn Key %*ph\n", - SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3encryptionkey); - ksmbd_debug(AUTH, "ServerOut Key %*ph\n", - SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3decryptionkey); - } } void ksmbd_gen_smb30_encryptionkey(struct ksmbd_conn *conn,
diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index a72d7e4..58e5b85 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c
@@ -102,8 +102,10 @@ ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon) { - if (atomic_dec_and_test(&tcon->refcount)) + if (atomic_dec_and_test(&tcon->refcount)) { + ksmbd_share_config_put(tcon->share_conf); kfree(tcon); + } } static int __ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, @@ -113,10 +115,11 @@ static int __ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id); ksmbd_release_tree_conn_id(sess, tree_conn->id); - ksmbd_share_config_put(tree_conn->share_conf); ksmbd_counter_dec(KSMBD_COUNTER_TREE_CONNS); - if (atomic_dec_and_test(&tree_conn->refcount)) + if (atomic_dec_and_test(&tree_conn->refcount)) { + ksmbd_share_config_put(tree_conn->share_conf); kfree(tree_conn); + } return ret; }
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 09d9878..9b2bb87 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c
@@ -82,11 +82,19 @@ static void lease_del_list(struct oplock_info *opinfo) spin_unlock(&lb->lb_lock); } -static void lb_add(struct lease_table *lb) +static struct lease_table *alloc_lease_table(struct oplock_info *opinfo) { - write_lock(&lease_list_lock); - list_add(&lb->l_entry, &lease_table_list); - write_unlock(&lease_list_lock); + struct lease_table *lb; + + lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); + if (!lb) + return NULL; + + memcpy(lb->client_guid, opinfo->conn->ClientGUID, + SMB2_CLIENT_GUID_SIZE); + INIT_LIST_HEAD(&lb->lease_list); + spin_lock_init(&lb->lb_lock); + return lb; } static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) @@ -120,7 +128,7 @@ static void free_lease(struct oplock_info *opinfo) kfree(lease); } -static void free_opinfo(struct oplock_info *opinfo) +static void __free_opinfo(struct oplock_info *opinfo) { if (opinfo->is_lease) free_lease(opinfo); @@ -129,6 +137,18 @@ static void free_opinfo(struct oplock_info *opinfo) kfree(opinfo); } +static void free_opinfo_rcu(struct rcu_head *rcu) +{ + struct oplock_info *opinfo = container_of(rcu, struct oplock_info, rcu); + + __free_opinfo(opinfo); +} + +static void free_opinfo(struct oplock_info *opinfo) +{ + call_rcu(&opinfo->rcu, free_opinfo_rcu); +} + struct oplock_info *opinfo_get(struct ksmbd_file *fp) { struct oplock_info *opinfo; @@ -176,9 +196,9 @@ void opinfo_put(struct oplock_info *opinfo) free_opinfo(opinfo); } -static void opinfo_add(struct oplock_info *opinfo) +static void opinfo_add(struct oplock_info *opinfo, struct ksmbd_file *fp) { - struct ksmbd_inode *ci = opinfo->o_fp->f_ci; + struct ksmbd_inode *ci = fp->f_ci; down_write(&ci->m_lock); list_add(&opinfo->op_entry, &ci->m_op_list); @@ -1030,34 +1050,27 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) lease2->version = lease1->version; } -static int add_lease_global_list(struct oplock_info *opinfo) +static void add_lease_global_list(struct oplock_info *opinfo, + struct lease_table *new_lb) { struct lease_table *lb; - read_lock(&lease_list_lock); + write_lock(&lease_list_lock); list_for_each_entry(lb, &lease_table_list, l_entry) { if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID, SMB2_CLIENT_GUID_SIZE)) { opinfo->o_lease->l_lb = lb; lease_add_list(opinfo); - read_unlock(&lease_list_lock); - return 0; + write_unlock(&lease_list_lock); + kfree(new_lb); + return; } } - read_unlock(&lease_list_lock); - lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); - if (!lb) - return -ENOMEM; - - memcpy(lb->client_guid, opinfo->conn->ClientGUID, - SMB2_CLIENT_GUID_SIZE); - INIT_LIST_HEAD(&lb->lease_list); - spin_lock_init(&lb->lb_lock); - opinfo->o_lease->l_lb = lb; + opinfo->o_lease->l_lb = new_lb; lease_add_list(opinfo); - lb_add(lb); - return 0; + list_add(&new_lb->l_entry, &lease_table_list); + write_unlock(&lease_list_lock); } static void set_oplock_level(struct oplock_info *opinfo, int level, @@ -1123,10 +1136,12 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) rcu_read_lock(); opinfo = rcu_dereference(fp->f_opinfo); - rcu_read_unlock(); - if (!opinfo || !opinfo->is_lease || opinfo->o_lease->version != 2) + if (!opinfo || !opinfo->is_lease || opinfo->o_lease->version != 2) { + rcu_read_unlock(); return; + } + rcu_read_unlock(); p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); if (!p_ci) @@ -1175,6 +1190,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, int err = 0; struct oplock_info *opinfo = NULL, *prev_opinfo = NULL; struct ksmbd_inode *ci = fp->f_ci; + struct lease_table *new_lb = NULL; bool prev_op_has_lease; __le32 prev_op_state = 0; @@ -1277,20 +1293,37 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, set_oplock_level(opinfo, req_op_level, lctx); out: - rcu_assign_pointer(fp->f_opinfo, opinfo); + /* + * Set o_fp before any publication so that concurrent readers + * (e.g. find_same_lease_key() on the lease list) that + * dereference opinfo->o_fp don't hit a NULL pointer. + * + * Keep the original publication order so concurrent opens can + * still observe the in-flight grant via ci->m_op_list, but make + * everything after opinfo_add() no-fail by preallocating any new + * lease_table first. + */ opinfo->o_fp = fp; + if (opinfo->is_lease) { + new_lb = alloc_lease_table(opinfo); + if (!new_lb) { + err = -ENOMEM; + goto err_out; + } + } opinfo_count_inc(fp); - opinfo_add(opinfo); - if (opinfo->is_lease) { - err = add_lease_global_list(opinfo); - if (err) - goto err_out; - } + opinfo_add(opinfo, fp); + + if (opinfo->is_lease) + add_lease_global_list(opinfo, new_lb); + + rcu_assign_pointer(fp->f_opinfo, opinfo); return 0; err_out: - free_opinfo(opinfo); + kfree(new_lb); + opinfo_put(opinfo); return err; }
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 9a56eaa..921e319 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h
@@ -69,8 +69,9 @@ struct oplock_info { struct lease *o_lease; struct list_head op_entry; struct list_head lease_entry; - wait_queue_head_t oplock_q; /* Other server threads */ - wait_queue_head_t oplock_brk; /* oplock breaking wait */ + wait_queue_head_t oplock_q; /* Other server threads */ + wait_queue_head_t oplock_brk; /* oplock breaking wait */ + struct rcu_head rcu; }; struct lease_break_info {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 95901a7..8e4cfdc 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c
@@ -4,6 +4,7 @@ * Copyright (C) 2018 Samsung Electronics Co., Ltd. */ +#include <crypto/utils.h> #include <linux/inetdevice.h> #include <net/addrconf.h> #include <linux/syscalls.h> @@ -125,6 +126,8 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) pr_err("The first operation in the compound does not have tcon\n"); return -EINVAL; } + if (work->tcon->t_state != TREE_CONNECTED) + return -ENOENT; if (tree_id != UINT_MAX && work->tcon->id != tree_id) { pr_err("tree id(%u) is different with id(%u) in first operation\n", tree_id, work->tcon->id); @@ -1936,8 +1939,14 @@ int smb2_sess_setup(struct ksmbd_work *work) if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION) try_delay = true; - sess->last_active = jiffies; - sess->state = SMB2_SESSION_EXPIRED; + /* + * For binding requests, session belongs to another + * connection. Do not expire it. + */ + if (!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { + sess->last_active = jiffies; + sess->state = SMB2_SESSION_EXPIRED; + } ksmbd_user_session_put(sess); work->sess = NULL; if (try_delay) { @@ -1947,6 +1956,7 @@ int smb2_sess_setup(struct ksmbd_work *work) } } smb2_set_err_rsp(work); + conn->binding = false; } else { unsigned int iov_len; @@ -2827,7 +2837,11 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - dh_info->fp->conn = conn; + if (dh_info->fp->conn) { + ksmbd_put_durable_fd(dh_info->fp); + err = -EBADF; + goto out; + } dh_info->reconnected = true; goto out; } @@ -3011,13 +3025,14 @@ int smb2_open(struct ksmbd_work *work) goto err_out2; } + fp = dh_info.fp; + if (ksmbd_override_fsids(work)) { rc = -ENOMEM; ksmbd_put_durable_fd(dh_info.fp); goto err_out2; } - fp = dh_info.fp; file_info = FILE_OPENED; rc = ksmbd_vfs_getattr(&fp->filp->f_path, &stat); @@ -3387,20 +3402,24 @@ int smb2_open(struct ksmbd_work *work) KSMBD_SHARE_FLAG_ACL_XATTR)) { struct smb_fattr fattr; struct smb_ntsd *pntsd; - int pntsd_size, ace_num = 0; + int pntsd_size; + size_t scratch_len; ksmbd_acls_fattr(&fattr, idmap, inode); - if (fattr.cf_acls) - ace_num = fattr.cf_acls->a_count; - if (fattr.cf_dacls) - ace_num += fattr.cf_dacls->a_count; + scratch_len = smb_acl_sec_desc_scratch_len(&fattr, + NULL, 0, + OWNER_SECINFO | GROUP_SECINFO | + DACL_SECINFO); + if (!scratch_len || scratch_len == SIZE_MAX) { + rc = -EFBIG; + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); + goto err_out; + } - pntsd = kmalloc(sizeof(struct smb_ntsd) + - sizeof(struct smb_sid) * 3 + - sizeof(struct smb_acl) + - sizeof(struct smb_ace) * ace_num * 2, - KSMBD_DEFAULT_GFP); + pntsd = kvzalloc(scratch_len, KSMBD_DEFAULT_GFP); if (!pntsd) { + rc = -ENOMEM; posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); goto err_out; @@ -3415,7 +3434,7 @@ int smb2_open(struct ksmbd_work *work) posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); if (rc) { - kfree(pntsd); + kvfree(pntsd); goto err_out; } @@ -3425,7 +3444,7 @@ int smb2_open(struct ksmbd_work *work) pntsd, pntsd_size, false); - kfree(pntsd); + kvfree(pntsd); if (rc) pr_err("failed to store ntacl in xattr : %d\n", rc); @@ -3615,10 +3634,8 @@ int smb2_open(struct ksmbd_work *work) reconnected_fp: rsp->StructureSize = cpu_to_le16(89); - rcu_read_lock(); - opinfo = rcu_dereference(fp->f_opinfo); + opinfo = opinfo_get(fp); rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0; - rcu_read_unlock(); rsp->Flags = 0; rsp->CreateAction = cpu_to_le32(file_info); rsp->CreationTime = cpu_to_le64(fp->create_time); @@ -3659,6 +3676,7 @@ int smb2_open(struct ksmbd_work *work) next_ptr = &lease_ccontext->Next; next_off = conn->vals->create_lease_size; } + opinfo_put(opinfo); if (maximal_access_ctxt) { struct create_context *mxac_ccontext; @@ -4438,8 +4456,9 @@ int smb2_query_dir(struct ksmbd_work *work) d_info.wptr = (char *)rsp->Buffer; d_info.rptr = (char *)rsp->Buffer; d_info.out_buf_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_directory_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (d_info.out_buf_len < 0) { rc = -EINVAL; goto err_out; @@ -4706,8 +4725,9 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, } buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) return -EINVAL; @@ -4924,7 +4944,8 @@ static int get_file_all_info(struct ksmbd_work *work, int conv_len; char *filename; u64 time; - int ret; + int ret, buf_free_len, filename_len; + struct smb2_query_info_req *req = ksmbd_req_buf_next(work); if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) { ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n", @@ -4936,6 +4957,16 @@ static int get_file_all_info(struct ksmbd_work *work, if (IS_ERR(filename)) return PTR_ERR(filename); + filename_len = strlen(filename); + buf_free_len = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer) + + offsetof(struct smb2_file_all_info, FileName), + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < (filename_len + 1) * 2) { + kfree(filename); + return -EINVAL; + } + ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (ret) { @@ -4979,7 +5010,8 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, - PATH_MAX, conn->local_nls, 0); + min(filename_len, PATH_MAX), + conn->local_nls, 0); conv_len *= 2; file_info->FileNameLength = cpu_to_le32(conv_len); rsp->OutputBufferLength = @@ -5033,8 +5065,9 @@ static int get_file_stream_info(struct ksmbd_work *work, file_info = (struct smb2_file_stream_info *)rsp->Buffer; buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) goto out; @@ -5343,8 +5376,9 @@ static int smb2_get_info_file(struct ksmbd_work *work, if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) { /* smb2 info file called for pipe */ - return smb2_get_info_file_pipe(work->sess, req, rsp, + rc = smb2_get_info_file_pipe(work->sess, req, rsp, work->response_buf); + goto iov_pin_out; } if (work->next_smb2_rcv_hdr_off) { @@ -5444,6 +5478,12 @@ static int smb2_get_info_file(struct ksmbd_work *work, rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength), rsp, work->response_buf); ksmbd_fd_put(work, fp); + +iov_pin_out: + if (!rc) + rc = ksmbd_iov_pin_rsp(work, (void *)rsp, + offsetof(struct smb2_query_info_rsp, Buffer) + + le32_to_cpu(rsp->OutputBufferLength)); return rc; } @@ -5451,7 +5491,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, struct smb2_query_info_req *req, struct smb2_query_info_rsp *rsp) { - struct ksmbd_session *sess = work->sess; struct ksmbd_conn *conn = work->conn; struct ksmbd_share_config *share = work->tcon->share_conf; int fsinfoclass = 0; @@ -5588,10 +5627,11 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info = (struct object_id_info *)(rsp->Buffer); - if (!user_guest(sess->user)) - memcpy(info->objid, user_passkey(sess->user), 16); + if (path.mnt->mnt_sb->s_uuid_len == 16) + memcpy(info->objid, path.mnt->mnt_sb->s_uuid.b, + path.mnt->mnt_sb->s_uuid_len); else - memset(info->objid, 0, 16); + memcpy(info->objid, &stfs.f_fsid, sizeof(stfs.f_fsid)); info->extended_info.magic = cpu_to_le32(EXTENDED_INFO_MAGIC); info->extended_info.version = cpu_to_le32(1); @@ -5670,6 +5710,11 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength), rsp, work->response_buf); path_put(&path); + + if (!rc) + rc = ksmbd_iov_pin_rsp(work, (void *)rsp, + offsetof(struct smb2_query_info_rsp, Buffer) + + le32_to_cpu(rsp->OutputBufferLength)); return rc; } @@ -5679,13 +5724,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work, { struct ksmbd_file *fp; struct mnt_idmap *idmap; - struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL; + struct smb_ntsd *pntsd = NULL, *ppntsd = NULL; struct smb_fattr fattr = {{0}}; struct inode *inode; __u32 secdesclen = 0; unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID; int addition_info = le32_to_cpu(req->AdditionalInformation); - int rc = 0, ppntsd_size = 0; + int rc = 0, ppntsd_size = 0, max_len; + size_t scratch_len = 0; if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | PROTECTED_DACL_SECINFO | @@ -5693,6 +5739,11 @@ static int smb2_get_info_sec(struct ksmbd_work *work, ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n", addition_info); + pntsd = kzalloc(ALIGN(sizeof(struct smb_ntsd), 8), + KSMBD_DEFAULT_GFP); + if (!pntsd) + return -ENOMEM; + pntsd->revision = cpu_to_le16(1); pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PROTECTED); pntsd->osidoffset = 0; @@ -5701,9 +5752,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work, pntsd->dacloffset = 0; secdesclen = sizeof(struct smb_ntsd); - rsp->OutputBufferLength = cpu_to_le32(secdesclen); - - return 0; + goto iov_pin; } if (work->next_smb2_rcv_hdr_off) { @@ -5735,18 +5784,58 @@ static int smb2_get_info_sec(struct ksmbd_work *work, &ppntsd); /* Check if sd buffer size exceeds response buffer size */ - if (smb2_resp_buf_len(work, 8) > ppntsd_size) - rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size, - addition_info, &secdesclen, &fattr); + max_len = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); + if (max_len < 0) { + rc = -EINVAL; + goto release_acl; + } + + scratch_len = smb_acl_sec_desc_scratch_len(&fattr, ppntsd, + ppntsd_size, addition_info); + if (!scratch_len || scratch_len == SIZE_MAX) { + rc = -EFBIG; + goto release_acl; + } + + pntsd = kvzalloc(scratch_len, KSMBD_DEFAULT_GFP); + if (!pntsd) { + rc = -ENOMEM; + goto release_acl; + } + + rc = build_sec_desc(idmap, pntsd, ppntsd, ppntsd_size, + addition_info, &secdesclen, &fattr); + +release_acl: posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); kfree(ppntsd); ksmbd_fd_put(work, fp); - if (rc) - return rc; + if (!rc && ALIGN(secdesclen, 8) > scratch_len) + rc = -EFBIG; + if (rc) + goto err_out; + +iov_pin: rsp->OutputBufferLength = cpu_to_le32(secdesclen); - return 0; + rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength), + rsp, work->response_buf); + if (rc) + goto err_out; + + rc = ksmbd_iov_pin_rsp_read(work, (void *)rsp, + offsetof(struct smb2_query_info_rsp, Buffer), + pntsd, secdesclen); +err_out: + if (rc) { + rsp->OutputBufferLength = 0; + kvfree(pntsd); + } + + return rc; } /** @@ -5770,6 +5859,9 @@ int smb2_query_info(struct ksmbd_work *work) goto err_out; } + rsp->StructureSize = cpu_to_le16(9); + rsp->OutputBufferOffset = cpu_to_le16(72); + switch (req->InfoType) { case SMB2_O_INFO_FILE: ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n"); @@ -5790,14 +5882,6 @@ int smb2_query_info(struct ksmbd_work *work) } ksmbd_revert_fsids(work); - if (!rc) { - rsp->StructureSize = cpu_to_le16(9); - rsp->OutputBufferOffset = cpu_to_le16(72); - rc = ksmbd_iov_pin_rsp(work, (void *)rsp, - offsetof(struct smb2_query_info_rsp, Buffer) + - le32_to_cpu(rsp->OutputBufferLength)); - } - err_out: if (rc < 0) { if (rc == -EACCES) @@ -5808,6 +5892,8 @@ int smb2_query_info(struct ksmbd_work *work) rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR; else if (rc == -ENOMEM) rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; + else if (rc == -EINVAL && rsp->hdr.Status == 0) + rsp->hdr.Status = STATUS_INVALID_PARAMETER; else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0) rsp->hdr.Status = STATUS_INVALID_INFO_CLASS; smb2_set_err_rsp(work); @@ -7578,14 +7664,15 @@ int smb2_lock(struct ksmbd_work *work) rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { + locks_free_lock(flock); + kfree(smb_lock); if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { rsp->hdr.Status = STATUS_NOT_LOCKED; + err = rc; goto out; } - locks_free_lock(flock); - kfree(smb_lock); } else { if (rc == FILE_LOCK_DEFERRED) { void **argv; @@ -7654,6 +7741,9 @@ int smb2_lock(struct ksmbd_work *work) spin_unlock(&work->conn->llist_lock); ksmbd_debug(SMB, "successful in taking lock\n"); } else { + locks_free_lock(flock); + kfree(smb_lock); + err = rc; goto out; } } @@ -7684,13 +7774,17 @@ int smb2_lock(struct ksmbd_work *work) struct file_lock *rlock = NULL; rlock = smb_flock_init(filp); - rlock->c.flc_type = F_UNLCK; - rlock->fl_start = smb_lock->start; - rlock->fl_end = smb_lock->end; + if (rlock) { + rlock->c.flc_type = F_UNLCK; + rlock->fl_start = smb_lock->start; + rlock->fl_end = smb_lock->end; - rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); - if (rc) - pr_err("rollback unlock fail : %d\n", rc); + rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); + if (rc) + pr_err("rollback unlock fail : %d\n", rc); + } else { + pr_err("rollback unlock alloc failed\n"); + } list_del(&smb_lock->llist); spin_lock(&work->conn->llist_lock); @@ -7700,7 +7794,8 @@ int smb2_lock(struct ksmbd_work *work) spin_unlock(&work->conn->llist_lock); locks_free_lock(smb_lock->fl); - locks_free_lock(rlock); + if (rlock) + locks_free_lock(rlock); kfree(smb_lock); } out2: @@ -8183,8 +8278,9 @@ int smb2_ioctl(struct ksmbd_work *work) buffer = (char *)req + le32_to_cpu(req->InputOffset); cnt_code = le32_to_cpu(req->CtlCode); - ret = smb2_calc_max_out_buf_len(work, 48, - le32_to_cpu(req->MaxOutputResponse)); + ret = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_ioctl_rsp, Buffer), + le32_to_cpu(req->MaxOutputResponse)); if (ret < 0) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; goto out; @@ -8880,7 +8976,7 @@ int smb2_check_sign_req(struct ksmbd_work *work) ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, 1, signature); - if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) { pr_err("bad smb2 signature\n"); return 0; } @@ -8968,7 +9064,7 @@ int smb3_check_sign_req(struct ksmbd_work *work) if (ksmbd_sign_smb3_pdu(conn, signing_key, iov, 1, signature)) return 0; - if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) { + if (crypto_memneq(signature, signature_req, SMB2_SIGNATURE_SIZE)) { pr_err("bad smb2 signature\n"); return 0; }
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 257c6d2..8b6eafb 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h
@@ -83,7 +83,10 @@ struct create_durable_rsp { } Data; } __packed; -/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +/* + * See POSIX-SMB2 2.2.14.2.16 + * Link: https://gitlab.com/samba-team/smb3-posix-spec/-/blob/master/smb3_posix_extensions.md + */ struct create_posix_rsp { struct create_context_hdr ccontext; __u8 Name[16];
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 49c2abb..c30d018 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c
@@ -915,6 +915,49 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, return 0; } +size_t smb_acl_sec_desc_scratch_len(struct smb_fattr *fattr, + struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info) +{ + size_t len = sizeof(struct smb_ntsd); + size_t tmp; + + if (addition_info & OWNER_SECINFO) + len += sizeof(struct smb_sid); + if (addition_info & GROUP_SECINFO) + len += sizeof(struct smb_sid); + if (!(addition_info & DACL_SECINFO)) + return len; + + len += sizeof(struct smb_acl); + if (ppntsd && ppntsd_size > 0) { + unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset); + + if (dacl_offset < ppntsd_size && + check_add_overflow(len, ppntsd_size - dacl_offset, &len)) + return 0; + } + + if (fattr->cf_acls) { + if (check_mul_overflow((size_t)fattr->cf_acls->a_count, + 2 * sizeof(struct smb_ace), &tmp) || + check_add_overflow(len, tmp, &len)) + return 0; + } else { + /* default/minimum DACL */ + if (check_add_overflow(len, 5 * sizeof(struct smb_ace), &len)) + return 0; + } + + if (fattr->cf_dacls) { + if (check_mul_overflow((size_t)fattr->cf_dacls->a_count, + sizeof(struct smb_ace), &tmp) || + check_add_overflow(len, tmp, &len)) + return 0; + } + + return len; +} + /* Convert permission bits from mode to equivalent CIFS ACL */ int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 355adae..ab21ba2c 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h
@@ -101,6 +101,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, bool type_check, bool get_write); void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid); void ksmbd_init_domain(u32 *sub_auth); +size_t smb_acl_sec_desc_scratch_len(struct smb_fattr *fattr, + struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info); static inline uid_t posix_acl_uid_translate(struct mnt_idmap *idmap, struct posix_acl_entry *pace)
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 7c53b78..1885724 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c
@@ -2540,9 +2540,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t) goto put; req = (struct smbdirect_negotiate_req *)recvmsg->packet; - sp->max_recv_size = min_t(int, sp->max_recv_size, + sp->max_recv_size = min_t(u32, sp->max_recv_size, le32_to_cpu(req->preferred_send_size)); - sp->max_send_size = min_t(int, sp->max_send_size, + sp->max_send_size = min_t(u32, sp->max_send_size, le32_to_cpu(req->max_receive_size)); sp->max_fragmented_send_size = le32_to_cpu(req->max_fragmented_size);
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index ff4ea41..168f2dd 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c
@@ -87,11 +87,7 @@ static int proc_show_files(struct seq_file *m, void *v) rcu_read_lock(); opinfo = rcu_dereference(fp->f_opinfo); - rcu_read_unlock(); - - if (!opinfo) { - seq_printf(m, " %-15s", " "); - } else { + if (opinfo) { const struct ksmbd_const_name *const_names; int count; unsigned int level; @@ -105,8 +101,12 @@ static int proc_show_files(struct seq_file *m, void *v) count = ARRAY_SIZE(ksmbd_oplock_const_names); level = opinfo->level; } + rcu_read_unlock(); ksmbd_proc_show_const_name(m, " %-15s", const_names, count, level); + } else { + rcu_read_unlock(); + seq_printf(m, " %-15s", " "); } seq_printf(m, " %#010x %#010x %s\n",
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 8e958db..67abd4d 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c
@@ -344,6 +344,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, if (unlikely(length < 0)) return -EIO; + if (unlikely(*offset < 0 || *offset >= SQUASHFS_METADATA_SIZE)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) {
diff --git a/fs/tests/exec_kunit.c b/fs/tests/exec_kunit.c index f412d1a0..1c32cac 100644 --- a/fs/tests/exec_kunit.c +++ b/fs/tests/exec_kunit.c
@@ -94,9 +94,6 @@ static const struct bprm_stack_limits_result bprm_stack_limits_results[] = { { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3 + sizeof(void *)), .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, - { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * + sizeof(void *)), - .argc = 0, .envc = 0 }, - .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM, .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7fae800..23e8940 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c
@@ -181,22 +181,23 @@ static void udf_write_failed(struct address_space *mapping, loff_t to) } } -static int udf_adinicb_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int udf_handle_page_wb(struct folio *folio, + struct writeback_control *wbc) { - struct inode *inode = mapping->host; + struct inode *inode = folio->mapping->host; struct udf_inode_info *iinfo = UDF_I(inode); - struct folio *folio = NULL; - int error = 0; - while ((folio = writeback_iter(mapping, wbc, folio, &error))) { - BUG_ON(!folio_test_locked(folio)); - BUG_ON(folio->index != 0); - memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio, + /* + * Inodes in the normal format are handled by the generic code. This + * check is race-free as the folio lock protects us from inode type + * conversion. + */ + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) + return 1; + + memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio, 0, i_size_read(inode)); - folio_unlock(folio); - } - + folio_unlock(folio); mark_inode_dirty(inode); return 0; } @@ -204,12 +205,8 @@ static int udf_adinicb_writepages(struct address_space *mapping, static int udf_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - struct udf_inode_info *iinfo = UDF_I(inode); - - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - return udf_adinicb_writepages(mapping, wbc); - return mpage_writepages(mapping, wbc, udf_get_block_wb); + return __mpage_writepages(mapping, wbc, udf_get_block_wb, + udf_handle_page_wb); } static void udf_adinicb_read_folio(struct folio *folio)
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig index 76d1c59..b208829 100644 --- a/fs/verity/Kconfig +++ b/fs/verity/Kconfig
@@ -2,6 +2,9 @@ config FS_VERITY bool "FS Verity (read-only file-based authenticity protection)" + # Filesystems cache the Merkle tree at a 64K aligned offset in the + # pagecache. That approach assumes the page size is at most 64K. + depends on PAGE_SHIFT <= 16 select CRYPTO_HASH_INFO select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 9c6765c..bd8fbb4 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c
@@ -872,6 +872,34 @@ xfs_ag_shrink_space( return err2; } +void +xfs_growfs_compute_deltas( + struct xfs_mount *mp, + xfs_rfsblock_t nb, + int64_t *deltap, + xfs_agnumber_t *nagcountp) +{ + xfs_rfsblock_t nb_div, nb_mod; + int64_t delta; + xfs_agnumber_t nagcount; + + nb_div = nb; + nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); + if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) + nb_div++; + else if (nb_mod) + nb = nb_div * mp->m_sb.sb_agblocks; + + if (nb_div > XFS_MAX_AGNUMBER + 1) { + nb_div = XFS_MAX_AGNUMBER + 1; + nb = nb_div * mp->m_sb.sb_agblocks; + } + nagcount = nb_div; + delta = nb - mp->m_sb.sb_dblocks; + *deltap = delta; + *nagcountp = nagcount; +} + /* * Extent the AG indicated by the @id by the length passed in */
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 1f24cfa..3cd4790 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h
@@ -331,6 +331,9 @@ struct aghdr_init_data { int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); int xfs_ag_shrink_space(struct xfs_perag *pag, struct xfs_trans **tpp, xfs_extlen_t delta); +void +xfs_growfs_compute_deltas(struct xfs_mount *mp, xfs_rfsblock_t nb, + int64_t *deltap, xfs_agnumber_t *nagcountp); int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp, xfs_extlen_t len); int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 8244305..67fd9c7 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h
@@ -55,7 +55,8 @@ struct xfs_attr_list_context { struct xfs_trans *tp; struct xfs_inode *dp; /* inode */ struct xfs_attrlist_cursor_kern cursor; /* position in list */ - void *buffer; /* output buffer */ + /* output buffer */ + void *buffer __counted_by_ptr(bufsize); /* * Abort attribute list iteration if non-zero. Can be used to pass
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 47f48ae..2b78041 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create( } /* + * Reinitialize an existing attr fork block as an empty leaf, and attach + * the buffer to tp. + */ +int +xfs_attr3_leaf_init( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t blkno) +{ + struct xfs_buf *bp = NULL; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .owner = dp->i_ino, + .geo = dp->i_mount->m_attr_geo, + }; + + ASSERT(tp != NULL); + + return xfs_attr3_leaf_create(&args, blkno, &bp); +} +/* * Split the leaf node, rebalance, then add the new entry. * * Returns 0 if the entry was added, 1 if a further split is needed or a
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index aca46da..72639ef 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp, /* * Routines used for shrinking the Btree. */ + +int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t blkno); int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval); void xfs_attr3_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk,
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 766631f..ad801b7 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath( } /* - * Remove an entry from an intermediate node. + * Internal implementation to remove an entry from an intermediate node. */ STATIC void -xfs_da3_node_remove( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk) +__xfs_da3_node_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_da_geometry *geo, + struct xfs_da_state_blk *drop_blk) { struct xfs_da_intnode *node; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int index; int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); @@ -1536,17 +1535,17 @@ xfs_da3_node_remove( tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(&btree[index], &btree[index + 1], tmp); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], tmp)); index = nodehdr.count - 1; } memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); + xfs_trans_log_buf(tp, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1555,6 +1554,38 @@ xfs_da3_node_remove( } /* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da3_node_remove( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk) +{ + trace_xfs_da_node_remove(state->args); + + __xfs_da3_node_remove(state->args->trans, state->args->dp, + state->args->geo, drop_blk); +} + +/* + * Remove an entry from an intermediate attr node at the specified index. + */ +void +xfs_attr3_node_entry_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf *bp, + int index) +{ + struct xfs_da_state_blk blk = { + .index = index, + .bp = bp, + }; + + __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk); +} + +/* * Unbalance the elements between two intermediate nodes, * move all Btree elements from one node into another. */ @@ -2716,12 +2747,8 @@ xfs_dabuf_map( * larger one that needs to be free by the caller. */ if (nirecs > 1) { - map = kzalloc(nirecs * sizeof(struct xfs_buf_map), - GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); - if (!map) { - error = -ENOMEM; - goto out_free_irecs; - } + map = kcalloc(nirecs, sizeof(struct xfs_buf_map), + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); *mapp = map; }
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 354d5d6..afcf2d3 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state); int xfs_da3_join(xfs_da_state_t *state); void xfs_da3_fixhashpath(struct xfs_da_state *state, struct xfs_da_state_path *path_to_to_fix); +void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, int index); /* * Routines used for finding things in the Btree.
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 472c261..c690971 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c
@@ -809,7 +809,7 @@ xfs_defer_can_append( /* Paused items cannot absorb more work */ if (dfp->dfp_flags & XFS_DEFER_PAUSED) - return NULL; + return false; /* Already full? */ if (ops->max_items && dfp->dfp_count >= ops->max_items)
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index a017016..3794e54 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -268,6 +268,10 @@ xfs_inode_from_disk( } if (xfs_is_reflink_inode(ip)) xfs_ifork_init_cow(ip); + if (xfs_is_metadir_inode(ip)) { + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); + XFS_STATS_INC(ip->i_mount, xs_inodes_meta); + } return 0; out_destroy_data_fork:
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c index cf239f86..71f004e 100644 --- a/fs/xfs/libxfs/xfs_metafile.c +++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -61,6 +61,9 @@ xfs_metafile_set_iflag( ip->i_diflags2 |= XFS_DIFLAG2_METADATA; ip->i_metatype = metafile_type; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); + XFS_STATS_INC(ip->i_mount, xs_inodes_meta); } /* Clear the metadata directory inode flag. */ @@ -74,6 +77,8 @@ xfs_metafile_clear_iflag( ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(ip->i_mount, xs_inodes_active); + XFS_STATS_DEC(ip->i_mount, xs_inodes_meta); } /*
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 2e9715c..23cde12 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -73,7 +73,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64); - XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4); @@ -116,7 +116,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0); XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2); @@ -136,16 +136,7 @@ xfs_check_ondisk_structs(void) /* ondisk dir/attr structures from xfs/122 */ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3); - XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10); /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); @@ -217,11 +208,6 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); - XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); - XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); - XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); - /* * Make sure the incore inode timestamp range corresponds to hand * converted values based on the ondisk format specification. @@ -301,6 +287,40 @@ xfs_check_ondisk_structs(void) XFS_CHECK_SB_OFFSET(sb_pad, 281); XFS_CHECK_SB_OFFSET(sb_rtstart, 288); XFS_CHECK_SB_OFFSET(sb_rtreserved, 296); + + /* + * ioctl UABI + * + * Due to different padding/alignment requirements across + * different architectures, some structures are ommited from + * the size checks. In addition, structures with architecture + * dependent size fields are also ommited (e.g. __kernel_long_t). + */ + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); + XFS_CHECK_STRUCT_SIZE(struct dioattr, 12); + XFS_CHECK_STRUCT_SIZE(struct getbmap, 32); + XFS_CHECK_STRUCT_SIZE(struct getbmapx, 48); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_cursor, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrlist_ent, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_ag_geometry, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_rtgroup_geometry, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_error_injection, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom, 256); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_geom_v4, 112); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_counts, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsop_resblks, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_growfs_log, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulk_ireq, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_fs_eofblocks, 128); + XFS_CHECK_STRUCT_SIZE(struct xfs_fsid, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_metadata, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_scrub_vec_head, 40); } #endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 38d16fe..47322ad 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1347,6 +1347,9 @@ xfs_log_sb( * feature was introduced. This counter can go negative due to the way * we handle nearly-lockless reservations, so we must use the _positive * variant here to avoid writing out nonsense frextents. + * + * RT groups are only supported on v5 file systems, which always + * have lazy SB counters. */ if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) { mp->m_sb.sb_frextents =
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 9dc55c9..23b80c5 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c
@@ -177,7 +177,7 @@ xrep_dir_teardown( rd->dir_names = NULL; if (rd->dir_entries) xfarray_destroy(rd->dir_entries); - rd->dir_names = NULL; + rd->dir_entries = NULL; } /* Set up for a directory repair. */
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 52a108f..33c6db6 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c
@@ -442,6 +442,11 @@ xrep_adoption_check_dcache( return 0; d_child = try_lookup_noperm(&qname, d_orphanage); + if (IS_ERR(d_child)) { + dput(d_orphanage); + return PTR_ERR(d_child); + } + if (d_child) { trace_xrep_adoption_check_child(sc->mp, d_child); @@ -479,7 +484,7 @@ xrep_adoption_zap_dcache( return; d_child = try_lookup_noperm(&qname, d_orphanage); - while (d_child != NULL) { + while (!IS_ERR_OR_NULL(d_child)) { trace_xrep_adoption_invalidate_child(sc->mp, d_child); ASSERT(d_is_negative(d_child));
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 1d25bd5..222812f 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c
@@ -171,8 +171,10 @@ xchk_quota_item( error = xchk_quota_item_bmap(sc, dq, offset); xchk_iunlock(sc, XFS_ILOCK_SHARED); - if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) { + mutex_unlock(&dq->q_qlock); return error; + } /* * Warn if the hard limits are larger than the fs.
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 39ea651..286c5f5 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h
@@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; - __entry->ino = file_inode(xf->file)->i_ino; - path = file_path(xf->file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("xfino 0x%lx path '%s'", - __entry->ino, - __entry->pathname) + TP_printk("xfino 0x%lx", + __entry->ino) ); TRACE_EVENT(xfile_destroy,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 9233199..a5b69c0 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c
@@ -140,7 +140,7 @@ xfs_attr3_node_inactive( xfs_daddr_t parent_blkno, child_blkno; struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; - int error, i; + int error; /* * Since this code is recursive (gasp!) we must protect ourselves. @@ -152,7 +152,7 @@ xfs_attr3_node_inactive( return -EFSCORRUPTED; } - xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); parent_blkno = xfs_buf_daddr(bp); if (!ichdr.count) { xfs_trans_brelse(*trans, bp); @@ -167,7 +167,7 @@ xfs_attr3_node_inactive( * over the leaves removing all of them. If this is higher up * in the tree, recurse downward. */ - for (i = 0; i < ichdr.count; i++) { + while (ichdr.count > 0) { /* * Read the subsidiary block to see what we have to work with. * Don't do this in a transaction. This is a depth-first @@ -218,29 +218,32 @@ xfs_attr3_node_inactive( xfs_trans_binval(*trans, child_bp); child_bp = NULL; - /* - * If we're not done, re-read the parent to get the next - * child block number. - */ - if (i + 1 < ichdr.count) { - struct xfs_da3_icnode_hdr phdr; + error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; - error = xfs_da3_node_read_mapped(*trans, dp, - parent_blkno, &bp, XFS_ATTR_FORK); + /* + * Remove entry from parent node, prevents being indexed to. + */ + xfs_attr3_node_entry_remove(*trans, dp, bp, 0); + + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); + bp = NULL; + + if (ichdr.count > 0) { + /* + * If we're not done, get the next child block number. + */ + child_fsb = be32_to_cpu(ichdr.btree[0].before); + + /* + * Atomically commit the whole invalidate stuff. + */ + error = xfs_trans_roll_inode(trans, dp); if (error) return error; - xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, - bp->b_addr); - child_fsb = be32_to_cpu(phdr.btree[i + 1].before); - xfs_trans_brelse(*trans, bp); - bp = NULL; } - /* - * Atomically commit the whole invalidate stuff. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; } return 0; @@ -257,10 +260,8 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { - struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; - xfs_daddr_t blkno; int error; /* @@ -272,7 +273,6 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = xfs_buf_daddr(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. @@ -283,10 +283,26 @@ xfs_attr3_root_inactive( case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); + /* + * Empty root node block are not allowed, convert it to leaf. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): error = xfs_attr3_leaf_inactive(trans, dp, bp); + /* + * Reinit the leaf before truncating extents so that a crash + * mid-truncation leaves an empty leaf rather than one with + * entries that may reference freed remote value blocks. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; default: xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); @@ -295,21 +311,6 @@ xfs_attr3_root_inactive( xfs_trans_brelse(*trans, bp); break; } - if (error) - return error; - - /* - * Invalidate the incore copy of the root block. - */ - error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); - if (error) - return error; - xfs_trans_binval(*trans, bp); /* remove from cache */ - /* - * Commit the invalidate and start the next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); return error; } @@ -328,6 +329,7 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; + struct xfs_buf *bp; int lock_mode = XFS_ILOCK_SHARED; int error = 0; @@ -363,10 +365,27 @@ xfs_attr_inactive( * removal below. */ if (dp->i_af.if_nextents > 0) { + /* + * Invalidate and truncate all blocks but leave the root block. + */ error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, + XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount)); + if (error) + goto out_cancel; + + /* + * Invalidate and truncate the root block and ensure that the + * operation is completed within a single transaction. + */ + error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK); + if (error) + goto out_cancel; + + xfs_trans_binval(trans, bp); error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel;
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 354472b..deab14f 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c
@@ -653,7 +653,6 @@ xfs_attri_recover_work( break; } if (error) { - xfs_irele(ip); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, sizeof(*attrp)); return ERR_PTR(-EFSCORRUPTED); @@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2( break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: - /* Log item, attr name, attr value */ - if (item->ri_total != 3) { + /* Log item, attr name, optional attr value */ + if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; @@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } - switch (op) { - case XFS_ATTRI_OP_FLAGS_REMOVE: - /* Regular remove operations operate only on names. */ - if (attr_value != NULL || value_len != 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - fallthrough; - case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: - case XFS_ATTRI_OP_FLAGS_PPTR_SET: - case XFS_ATTRI_OP_FLAGS_SET: - case XFS_ATTRI_OP_FLAGS_REPLACE: - /* - * Regular xattr set/remove/replace operations require a name - * and do not take a newname. Values are optional for set and - * replace. - * - * Name-value set/remove operations must have a name, do not - * take a newname, and can take a value. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: - /* - * Name-value replace operations require the caller to - * specify the old and new names and values explicitly. - * Values are optional. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - if (attr_new_name == NULL || new_name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - } - /* * Memory alloc failure will cause replay to abort. We attach the * name/value buffer to the recovered incore log item and drop our
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e8775f2..b237a25 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c
@@ -245,7 +245,7 @@ xfs_bmap_update_diff_items( struct xfs_bmap_intent *ba = bi_entry(a); struct xfs_bmap_intent *bb = bi_entry(b); - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; + return cmp_int(ba->bi_owner->i_ino, bb->bi_owner->i_ino); } /* Log bmap updates in the intent item. */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 2b208e2c..69e9bc5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c
@@ -1439,9 +1439,15 @@ xfs_qm_dqflush( return 0; out_abort: + /* + * Shut down the log before removing the dquot item from the AIL. + * Otherwise, the log tail may advance past this item's LSN while + * log writes are still in progress, making these unflushed changes + * unrecoverable on the next mount. + */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(lip, 0); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_dqfunlock(dqp); return error; }
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 491e2a7..65a0e69 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c
@@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push( struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push( goto out_unlock; } - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error == -EAGAIN) { @@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push( rval = XFS_ITEM_FLUSHING; } xfs_buf_relse(bp); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ out_relock_ail: - spin_lock(&lip->li_ailp->ail_lock); + spin_lock(&ailp->ail_lock); out_unlock: mutex_unlock(&dqp->q_qlock); return rval;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 17255c4..8d64d90 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c
@@ -95,18 +95,17 @@ xfs_growfs_data_private( struct xfs_growfs_data *in) /* growfs data input struct */ { xfs_agnumber_t oagcount = mp->m_sb.sb_agcount; + xfs_rfsblock_t nb = in->newblocks; struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; - xfs_rfsblock_t nb, nb_div, nb_mod; int64_t delta; bool lastag_extended = false; struct xfs_trans *tp; struct aghdr_init_data id = {}; struct xfs_perag *last_pag; - nb = in->newblocks; error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); if (error) return error; @@ -125,20 +124,8 @@ xfs_growfs_data_private( mp->m_sb.sb_rextsize); if (error) return error; + xfs_growfs_compute_deltas(mp, nb, &delta, &nagcount); - nb_div = nb; - nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); - if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) - nb_div++; - else if (nb_mod) - nb = nb_div * mp->m_sb.sb_agblocks; - - if (nb_div > XFS_MAX_AGNUMBER + 1) { - nb_div = XFS_MAX_AGNUMBER + 1; - nb = nb_div * mp->m_sb.sb_agblocks; - } - nagcount = nb_div; - delta = nb - mp->m_sb.sb_dblocks; /* * Reject filesystems with a single AG because they are not * supported, and reject a shrink operation that would cause a
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index d1291ca..2b8617a 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c
@@ -443,8 +443,8 @@ xfs_ioc_attr_list( context.dp = dp; context.resynch = 1; context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.buffer = buffer; context.firstu = context.bufsize; context.put_listent = xfs_ioc_attr_put_listent;
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 1691237..239b843 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c
@@ -314,6 +314,22 @@ xfs_rgno_mark_sick( xfs_rtgroup_put(rtg); } +static inline void xfs_inode_report_fserror(struct xfs_inode *ip) +{ + /* + * Do not report inodes being constructed or freed, or metadata inodes, + * to fsnotify. + */ + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIM) || + xfs_is_internal_inode(ip)) { + fserror_report_metadata(ip->i_mount->m_super, -EFSCORRUPTED, + GFP_NOFS); + return; + } + + fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); +} + /* Mark the unhealthy parts of an inode. */ void xfs_inode_mark_sick( @@ -339,7 +355,7 @@ xfs_inode_mark_sick( inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); - fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + xfs_inode_report_fserror(ip); if (mask) xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask, mask); @@ -371,7 +387,7 @@ xfs_inode_mark_corrupt( inode_state_clear(VFS_I(ip), I_DONTCACHE); spin_unlock(&VFS_I(ip)->i_lock); - fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + xfs_inode_report_fserror(ip); if (mask) xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask, mask);
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index e37c18c..26c325d 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c
@@ -69,7 +69,7 @@ xfs_healthmon_get( struct xfs_healthmon *hm; rcu_read_lock(); - hm = mp->m_healthmon; + hm = rcu_dereference(mp->m_healthmon); if (hm && !refcount_inc_not_zero(&hm->ref)) hm = NULL; rcu_read_unlock(); @@ -110,13 +110,13 @@ xfs_healthmon_attach( struct xfs_healthmon *hm) { spin_lock(&xfs_healthmon_lock); - if (mp->m_healthmon != NULL) { + if (rcu_access_pointer(mp->m_healthmon) != NULL) { spin_unlock(&xfs_healthmon_lock); return -EEXIST; } refcount_inc(&hm->ref); - mp->m_healthmon = hm; + rcu_assign_pointer(mp->m_healthmon, hm); hm->mount_cookie = (uintptr_t)mp->m_super; spin_unlock(&xfs_healthmon_lock); @@ -128,16 +128,29 @@ STATIC void xfs_healthmon_detach( struct xfs_healthmon *hm) { + struct xfs_mount *mp; + spin_lock(&xfs_healthmon_lock); if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) { spin_unlock(&xfs_healthmon_lock); return; } - XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL; + mp = XFS_M((struct super_block *)hm->mount_cookie); + rcu_assign_pointer(mp->m_healthmon, NULL); hm->mount_cookie = DETACHED_MOUNT_COOKIE; spin_unlock(&xfs_healthmon_lock); + /* + * Wake up any readers that might remain. This can happen if unmount + * races with the healthmon fd owner entering ->read_iter, having + * already emptied the event queue. + * + * In the ->release case there shouldn't be any readers because the + * only users of the waiter are read and poll. + */ + wake_up_all(&hm->wait); + trace_xfs_healthmon_detach(hm); xfs_healthmon_put(hm); } @@ -1024,13 +1037,6 @@ xfs_healthmon_release( * process can create another health monitor file. */ xfs_healthmon_detach(hm); - - /* - * Wake up any readers that might be left. There shouldn't be any - * because the only users of the waiter are read and poll. - */ - wake_up_all(&hm->wait); - xfs_healthmon_put(hm); return 0; }
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index dbaab4a..2040a92 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c
@@ -106,7 +106,7 @@ xfs_inode_alloc( mapping_set_folio_min_order(VFS_I(ip)->i_mapping, M_IGEO(mp)->min_folio_order); - XFS_STATS_INC(mp, vn_active); + XFS_STATS_INC(mp, xs_inodes_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(ip->i_ino == 0); @@ -159,7 +159,6 @@ xfs_inode_free_callback( ASSERT(!test_bit(XFS_LI_IN_AIL, &ip->i_itemp->ili_item.li_flags)); xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; } kmem_cache_free(xfs_inode_cache, ip); @@ -172,7 +171,10 @@ __xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!ip->i_itemp || list_empty(&ip->i_itemp->ili_item.li_bio_list)); - XFS_STATS_DEC(ip->i_mount, vn_active); + if (xfs_is_metadir_inode(ip)) + XFS_STATS_DEC(ip->i_mount, xs_inodes_meta); + else + XFS_STATS_DEC(ip->i_mount, xs_inodes_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } @@ -636,6 +638,14 @@ xfs_iget_cache_miss( if (!ip) return -ENOMEM; + /* + * Set XFS_INEW as early as possible so that the health code won't pass + * the inode to the fserror code if the ondisk inode cannot be loaded. + * We're going to free the xfs_inode immediately if that happens, which + * would lead to UAF problems. + */ + xfs_iflags_set(ip, XFS_INEW); + error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags); if (error) goto out_destroy; @@ -713,7 +723,6 @@ xfs_iget_cache_miss( ip->i_udquot = NULL; ip->i_gdquot = NULL; ip->i_pdquot = NULL; - xfs_iflags_set(ip, XFS_INEW); /* insert the new inode */ spin_lock(&pag->pag_ici_lock); @@ -2234,7 +2243,7 @@ xfs_inode_mark_reclaimable( struct xfs_mount *mp = ip->i_mount; bool need_inactive; - XFS_STATS_INC(mp, vn_reclaim); + XFS_STATS_INC(mp, xs_inode_mark_reclaimable); /* * We should never get here with any of the reclaim flags already set.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 50c0404f9..beaa26e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c
@@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (icount_read(VFS_I(ip))) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); - ASSERT(new_size <= XFS_ISIZE(ip)); + if (whichfork == XFS_DATA_FORK) + ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 8913036..4ae81ee 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c
@@ -746,6 +746,7 @@ xfs_inode_item_push( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = lip->li_buf; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -771,7 +772,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); /* * We need to hold a reference for flushing the cluster buffer as it may @@ -795,7 +796,11 @@ xfs_inode_item_push( rval = XFS_ITEM_LOCKED; } - spin_lock(&lip->li_ailp->ail_lock); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ + spin_lock(&ailp->ail_lock); return rval; }
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b96f262..f807f8f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c
@@ -1357,6 +1357,8 @@ xlog_alloc_log( if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) log->l_iclog_roundoff = mp->m_sb.sb_logsunit; + else if (mp->m_sb.sb_logsectsize > 0) + log->l_iclog_roundoff = mp->m_sb.sb_logsectsize; else log->l_iclog_roundoff = BBSIZE;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9c295ab..ef1ea8a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c
@@ -608,8 +608,9 @@ xfs_unmount_check( * have been retrying in the background. This will prevent never-ending * retries in AIL pushing from hanging the unmount. * - * Finally, we can push the AIL to clean all the remaining dirty objects, then - * reclaim the remaining inodes that are still in memory at this point in time. + * Stop inodegc and background reclaim before pushing the AIL so that they + * are not running while the AIL is being flushed. Then push the AIL to + * clean all the remaining dirty objects and reclaim the remaining inodes. */ static void xfs_unmount_flush_inodes( @@ -621,9 +622,9 @@ xfs_unmount_flush_inodes( xfs_set_unmounting(mp); - xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); xfs_healthmon_unmount(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 61c7112..ddd4028 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h
@@ -345,7 +345,7 @@ typedef struct xfs_mount { struct xfs_hooks m_dir_update_hooks; /* Private data referring to a health monitor object. */ - struct xfs_healthmon *m_healthmon; + struct xfs_healthmon __rcu *m_healthmon; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo)
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 6be19fa..64c8afb 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c
@@ -304,7 +304,7 @@ xfs_dax_notify_dev_failure( error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); if (error) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } @@ -340,7 +340,7 @@ xfs_dax_notify_dev_failure( if (rtg) xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); if (error) { - xfs_group_put(xg); + xfs_group_rele(xg); break; } }
diff --git a/fs/xfs/xfs_platform.h b/fs/xfs/xfs_platform.h index 1e59bf9..59a33c6 100644 --- a/fs/xfs/xfs_platform.h +++ b/fs/xfs/xfs_platform.h
@@ -235,6 +235,10 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, #ifdef XFS_WARN +/* + * Please note that this ASSERT doesn't kill the kernel. It will if the kernel + * has panic_on_warn set. + */ #define ASSERT(expr) \ (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__)) @@ -245,6 +249,11 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, #endif /* XFS_WARN */ #endif /* DEBUG */ +/* + * Use this to catch metadata corruptions that are not caught by block or + * structure verifiers. The reason is that the verifiers check corruptions only + * within the scope of the object being verified. + */ #define XFS_IS_CORRUPT(mp, expr) \ (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \ NULL, 0, __FILE__, __LINE__, \
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 90a94a5..153f3c3 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c
@@ -112,6 +112,10 @@ xfs_rtcopy_summary( error = xfs_rtget_summary(oargs, log, bbno, &sum); if (error) goto out; + if (XFS_IS_CORRUPT(oargs->mp, sum < 0)) { + error = -EFSCORRUPTED; + goto out; + } if (sum == 0) continue; error = xfs_rtmodify_summary(oargs, log, bbno, -sum); @@ -120,7 +124,6 @@ xfs_rtcopy_summary( error = xfs_rtmodify_summary(nargs, log, bbno, sum); if (error) goto out; - ASSERT(sum > 0); } } error = 0; @@ -1047,6 +1050,15 @@ xfs_growfs_rt_bmblock( */ xfs_trans_resv_calc(mp, &mp->m_resv); + /* + * Sync sb counters now to reflect the updated values. Lazy counters are + * not always updated and in order to avoid inconsistencies between + * frextents and rtextents, it is better to sync the counters. + */ + + if (xfs_has_lazysbcount(mp)) + xfs_log_sb(args.tp); + error = xfs_trans_commit(args.tp); if (error) goto out_free; @@ -1079,17 +1091,27 @@ xfs_last_rtgroup_extents( } /* - * Calculate the last rbmblock currently used. + * This will return the bitmap block number (indexed at 0) that will be + * extended/modified. There are 2 cases here: + * 1. The size of the rtg is such that it is a multiple of + * xfs_rtbitmap_rtx_per_rbmblock() i.e, an integral number of bitmap blocks + * are completely filled up. In this case, we should return + * 1 + (the last used bitmap block number). + * 2. The size of the rtg is not an multiple of xfs_rtbitmap_rtx_per_rbmblock(). + * Here we will return the block number of last used block number. In this + * case, we will modify the last used bitmap block to extend the size of the + * rtgroup. * * This also deals with the case where there were no rtextents before. */ static xfs_fileoff_t -xfs_last_rt_bmblock( +xfs_last_rt_bmblock_to_extend( struct xfs_rtgroup *rtg) { struct xfs_mount *mp = rtg_mount(rtg); xfs_rgnumber_t rgno = rtg_rgno(rtg); xfs_fileoff_t bmbno = 0; + unsigned int mod = 0; ASSERT(!mp->m_sb.sb_rgcount || rgno >= mp->m_sb.sb_rgcount - 1); @@ -1097,9 +1119,16 @@ xfs_last_rt_bmblock( xfs_rtxnum_t nrext = xfs_last_rtgroup_extents(mp); /* Also fill up the previous block if not entirely full. */ - bmbno = xfs_rtbitmap_blockcount_len(mp, nrext); - if (xfs_rtx_to_rbmword(mp, nrext) != 0) - bmbno--; + /* We are doing a -1 to convert it to a 0 based index */ + bmbno = xfs_rtbitmap_blockcount_len(mp, nrext) - 1; + div_u64_rem(nrext, xfs_rtbitmap_rtx_per_rbmblock(mp), &mod); + /* + * mod = 0 means that all the current blocks are full. So + * return the next block number to be used for the rtgroup + * growth. + */ + if (mod == 0) + bmbno++; } return bmbno; @@ -1204,7 +1233,8 @@ xfs_growfs_rtg( goto out_rele; } - for (bmbno = xfs_last_rt_bmblock(rtg); bmbno < bmblocks; bmbno++) { + for (bmbno = xfs_last_rt_bmblock_to_extend(rtg); bmbno < bmblocks; + bmbno++) { error = xfs_growfs_rt_bmblock(rtg, nrblocks, rextsize, bmbno); if (error) goto out_error;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 017db03..c13d600 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c
@@ -42,7 +42,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "xstrat", xfsstats_offset(xs_write_calls) }, { "rw", xfsstats_offset(xs_attr_get) }, { "attr", xfsstats_offset(xs_iflush_count)}, - { "icluster", xfsstats_offset(vn_active) }, + { "icluster", xfsstats_offset(xs_inodes_active) }, { "vnodes", xfsstats_offset(xb_get) }, { "buf", xfsstats_offset(xs_abtb_2) }, { "abtb2", xfsstats_offset(xs_abtc_2) }, @@ -59,7 +59,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)}, /* we print both series of quota information together */ { "qm", xfsstats_offset(xs_gc_read_calls)}, - { "zoned", xfsstats_offset(__pad1)}, + { "zoned", xfsstats_offset(xs_inodes_meta)}, + { "metafile", xfsstats_offset(xs_xstrat_bytes)}, }; /* Loop over all stats groups */ @@ -99,16 +100,20 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) void xfs_stats_clearall(struct xfsstats __percpu *stats) { + uint32_t xs_inodes_active, xs_inodes_meta; int c; - uint32_t vn_active; xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu_ptr(stats, c)->s.vn_active; + /* + * Save the active / meta inode counters, as they are stateful. + */ + xs_inodes_active = per_cpu_ptr(stats, c)->s.xs_inodes_active; + xs_inodes_meta = per_cpu_ptr(stats, c)->s.xs_inodes_meta; memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); - per_cpu_ptr(stats, c)->s.vn_active = vn_active; + per_cpu_ptr(stats, c)->s.xs_inodes_active = xs_inodes_active; + per_cpu_ptr(stats, c)->s.xs_inodes_meta = xs_inodes_meta; preempt_enable(); } }
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 153d238..57c32b8 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h
@@ -100,14 +100,14 @@ struct __xfsstats { uint32_t xs_iflush_count; uint32_t xs_icluster_flushcnt; uint32_t xs_icluster_flushinode; - uint32_t vn_active; /* # vnodes not on free lists */ - uint32_t vn_alloc; /* # times vn_alloc called */ - uint32_t vn_get; /* # times vn_get called */ - uint32_t vn_hold; /* # times vn_hold called */ - uint32_t vn_rele; /* # times vn_rele called */ - uint32_t vn_reclaim; /* # times vn_reclaim called */ - uint32_t vn_remove; /* # times vn_remove called */ - uint32_t vn_free; /* # times vn_free called */ + uint32_t xs_inodes_active; + uint32_t __unused_vn_alloc; + uint32_t __unused_vn_get; + uint32_t __unused_vn_hold; + uint32_t xs_inode_destroy; + uint32_t xs_inode_destroy2; /* same as xs_inode_destroy */ + uint32_t xs_inode_mark_reclaimable; + uint32_t __unused_vn_free; uint32_t xb_get; uint32_t xb_create; uint32_t xb_get_locked; @@ -142,7 +142,8 @@ struct __xfsstats { uint32_t xs_gc_read_calls; uint32_t xs_gc_write_calls; uint32_t xs_gc_zone_reset_calls; - uint32_t __pad1; +/* Metafile counters */ + uint32_t xs_inodes_meta; /* Extra precision counters */ uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index abc45f8..f8de444 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c
@@ -712,8 +712,8 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); ASSERT(!rwsem_is_locked(&inode->i_rwsem)); - XFS_STATS_INC(ip->i_mount, vn_rele); - XFS_STATS_INC(ip->i_mount, vn_remove); + XFS_STATS_INC(ip->i_mount, xs_inode_destroy); + XFS_STATS_INC(ip->i_mount, xs_inode_destroy2); xfs_inode_mark_reclaimable(ip); }
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 813e5a9..5e8190f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h
@@ -56,6 +56,7 @@ #include <linux/tracepoint.h> struct xfs_agf; +struct xfs_ail; struct xfs_alloc_arg; struct xfs_attr_list_context; struct xfs_buf_log_item; @@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force, DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) -DEFINE_LOG_ITEM_EVENT(xfs_ail_push); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); -DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); -DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); +DECLARE_EVENT_CLASS(xfs_ail_push_class, + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), + TP_ARGS(ailp, type, flags, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint, type) + __field(unsigned long, flags) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = ailp->ail_log->l_mp->m_super->s_dev; + __entry->type = type; + __entry->flags = flags; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d lsn %d/%d type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_PUSH_EVENT(name) \ +DEFINE_EVENT(xfs_ail_push_class, name, \ + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \ + TP_ARGS(ailp, type, flags, lsn)) +DEFINE_AIL_PUSH_EVENT(xfs_ail_push); +DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned); +DEFINE_AIL_PUSH_EVENT(xfs_ail_locked); +DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing); + DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_ARGS(lip, old_lsn, new_lsn), @@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - path = file_path(file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("dev %d:%d xmino 0x%lx path '%s'", + TP_printk("dev %d:%d xmino 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pathname) + __entry->ino) ); TRACE_EVENT(xmbuf_free,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 923729a..99a9bf3 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c
@@ -365,6 +365,12 @@ xfsaild_resubmit_item( return XFS_ITEM_SUCCESS; } +/* + * Push a single log item from the AIL. + * + * @lip may have been released and freed by the time this function returns, + * so callers must not dereference the log item afterwards. + */ static inline uint xfsaild_push_item( struct xfs_ail *ailp, @@ -458,6 +464,74 @@ xfs_ail_calc_push_target( return target_lsn; } +static void +xfsaild_process_logitem( + struct xfs_ail *ailp, + struct xfs_log_item *lip, + int *stuck, + int *flushing) +{ + struct xfs_mount *mp = ailp->ail_log->l_mp; + uint type = lip->li_type; + unsigned long flags = lip->li_flags; + xfs_lsn_t item_lsn = lip->li_lsn; + int lock_result; + + /* + * Note that iop_push may unlock and reacquire the AIL lock. We + * rely on the AIL cursor implementation to be able to deal with + * the dropped lock. + * + * The log item may have been freed by the push, so it must not + * be accessed or dereferenced below this line. + */ + lock_result = xfsaild_push_item(ailp, lip); + switch (lock_result) { + case XFS_ITEM_SUCCESS: + XFS_STATS_INC(mp, xs_push_ail_success); + trace_xfs_ail_push(ailp, type, flags, item_lsn); + + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_FLUSHING: + /* + * The item or its backing buffer is already being + * flushed. The typical reason for that is that an + * inode buffer is locked because we already pushed the + * updates to it as part of inode clustering. + * + * We do not want to stop flushing just because lots + * of items are already being flushed, but we need to + * re-try the flushing relatively soon if most of the + * AIL is being flushed. + */ + XFS_STATS_INC(mp, xs_push_ail_flushing); + trace_xfs_ail_flushing(ailp, type, flags, item_lsn); + + (*flushing)++; + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_PINNED: + XFS_STATS_INC(mp, xs_push_ail_pinned); + trace_xfs_ail_pinned(ailp, type, flags, item_lsn); + + (*stuck)++; + ailp->ail_log_flush++; + break; + case XFS_ITEM_LOCKED: + XFS_STATS_INC(mp, xs_push_ail_locked); + trace_xfs_ail_locked(ailp, type, flags, item_lsn); + + (*stuck)++; + break; + default: + ASSERT(0); + break; + } +} + static long xfsaild_push( struct xfs_ail *ailp) @@ -505,62 +579,11 @@ xfsaild_push( lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { - int lock_result; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; - /* - * Note that iop_push may unlock and reacquire the AIL lock. We - * rely on the AIL cursor implementation to be able to deal with - * the dropped lock. - */ - lock_result = xfsaild_push_item(ailp, lip); - switch (lock_result) { - case XFS_ITEM_SUCCESS: - XFS_STATS_INC(mp, xs_push_ail_success); - trace_xfs_ail_push(lip); - - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_FLUSHING: - /* - * The item or its backing buffer is already being - * flushed. The typical reason for that is that an - * inode buffer is locked because we already pushed the - * updates to it as part of inode clustering. - * - * We do not want to stop flushing just because lots - * of items are already being flushed, but we need to - * re-try the flushing relatively soon if most of the - * AIL is being flushed. - */ - XFS_STATS_INC(mp, xs_push_ail_flushing); - trace_xfs_ail_flushing(lip); - - flushing++; - ailp->ail_last_pushed_lsn = lsn; - break; - - case XFS_ITEM_PINNED: - XFS_STATS_INC(mp, xs_push_ail_pinned); - trace_xfs_ail_pinned(lip); - - stuck++; - ailp->ail_log_flush++; - break; - case XFS_ITEM_LOCKED: - XFS_STATS_INC(mp, xs_push_ail_locked); - trace_xfs_ail_locked(lip); - - stuck++; - break; - default: - ASSERT(0); - break; - } - + xfsaild_process_logitem(ailp, lip, &stuck, &flushing); count++; /*
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c index 069cd37..5ead397 100644 --- a/fs/xfs/xfs_verify_media.c +++ b/fs/xfs/xfs_verify_media.c
@@ -122,7 +122,7 @@ xfs_verify_report_losses( error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); if (error) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } @@ -158,7 +158,7 @@ xfs_verify_report_losses( if (rtg) xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); if (error) { - xfs_group_put(xg); + xfs_group_rele(xg); break; } } @@ -183,10 +183,9 @@ xfs_verify_iosize( min_not_zero(SZ_1M, me->me_max_io_size); BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); - ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize); - return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), - BBTOB(bbcount)); + return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount)); } /* Allocate as much memory as we can get for verification buffer. */ @@ -218,8 +217,8 @@ xfs_verify_media_error( unsigned int bio_bbcount, blk_status_t bio_status) { - trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, - bio_bbcount, bio_status); + trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount, + bio_status); /* * Pass any error, I/O or otherwise, up to the caller if we didn't @@ -280,7 +279,7 @@ xfs_verify_media( btp = mp->m_ddev_targp; break; case XFS_DEV_LOG: - if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + if (mp->m_logdev_targp != mp->m_ddev_targp) btp = mp->m_logdev_targp; break; case XFS_DEV_RT: @@ -299,7 +298,7 @@ xfs_verify_media( /* start and end have to be aligned to the lba size */ if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), - bdev_logical_block_size(btp->bt_bdev))) + btp->bt_logical_sectorsize)) return -EINVAL; /* @@ -331,8 +330,7 @@ xfs_verify_media( if (!folio) return -ENOMEM; - trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, - folio); + trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio); bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); if (!bio) { @@ -400,7 +398,7 @@ xfs_verify_media( * an operational error. */ me->me_start_daddr = daddr; - trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + trace_xfs_verify_media_end(mp, me, btp->bt_dev); return 0; }
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index a735f16..5442130 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c
@@ -332,8 +332,8 @@ xfs_vn_listxattr( memset(&context, 0, sizeof(context)); context.dp = XFS_I(inode); context.resynch = 1; - context.buffer = size ? data : NULL; context.bufsize = size; + context.buffer = size ? data : NULL; context.firstu = context.bufsize; context.put_listent = xfs_xattr_put_listent;
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 67e0c8f..e3d19b6 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c
@@ -78,7 +78,7 @@ xfs_zone_account_reclaimable( struct xfs_rtgroup *rtg, uint32_t freed) { - struct xfs_group *xg = &rtg->rtg_group; + struct xfs_group *xg = rtg_group(rtg); struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; uint32_t used = rtg_rmap(rtg)->i_used_blocks; @@ -759,7 +759,7 @@ xfs_zone_alloc_blocks( trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb); - *sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); + *sector = xfs_gbno_to_daddr(rtg_group(rtg), 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector); if (!*is_seq) *sector += XFS_FSB_TO_BB(mp, allocated); @@ -1080,7 +1080,7 @@ xfs_init_zone( if (write_pointer == 0) { /* zone is empty */ atomic_inc(&zi->zi_nr_free_zones); - xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); + xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE); iz->available += rtg_blocks(rtg); } else if (write_pointer < rtg_blocks(rtg)) { /* zone is open */
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 48c6cf5..309f700 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c
@@ -96,7 +96,6 @@ struct xfs_gc_bio { */ xfs_fsblock_t old_startblock; xfs_daddr_t new_daddr; - struct xfs_zone_scratch *scratch; /* Are we writing to a sequential write required zone? */ bool is_seq; @@ -627,7 +626,7 @@ xfs_zone_gc_alloc_blocks( if (!*count_fsb) return NULL; - *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0); + *daddr = xfs_gbno_to_daddr(rtg_group(oz->oz_rtg), 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); if (!*is_seq) *daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated); @@ -702,7 +701,7 @@ xfs_zone_gc_start_chunk( chunk->data = data; chunk->oz = oz; chunk->victim_rtg = iter->victim_rtg; - atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); @@ -779,7 +778,6 @@ xfs_zone_gc_split_write( ihold(VFS_I(chunk->ip)); split_chunk->ip = chunk->ip; split_chunk->is_seq = chunk->is_seq; - split_chunk->scratch = chunk->scratch; split_chunk->offset = chunk->offset; split_chunk->len = split_len; split_chunk->old_startblock = chunk->old_startblock; @@ -788,7 +786,7 @@ xfs_zone_gc_split_write( atomic_inc(&chunk->oz->oz_ref); split_chunk->victim_rtg = chunk->victim_rtg; - atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); chunk->offset += split_len; @@ -888,7 +886,7 @@ xfs_zone_gc_finish_reset( goto out; } - xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); + xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE); atomic_inc(&zi->zi_nr_free_zones); xfs_zoned_add_available(mp, rtg_blocks(rtg)); @@ -917,7 +915,7 @@ xfs_submit_zone_reset_bio( XFS_STATS_INC(mp, xs_gc_zone_reset_calls); - bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); + bio->bi_iter.bi_sector = xfs_gbno_to_daddr(rtg_group(rtg), 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { /* * Also use the bio to drive the state machine when neither
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eeb070f..1e1580f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h
@@ -848,12 +848,14 @@ /* Required sections not related to debugging. */ #define ELF_DETAILS \ - .modinfo : { *(.modinfo) . = ALIGN(8); } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ .shstrtab 0 : { *(.shstrtab) } +#define MODINFO \ + .modinfo : { *(.modinfo) . = ALIGN(8); } + #ifdef CONFIG_GENERIC_BUG #define BUG_TABLE \ . = ALIGN(8); \
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 107b797..0cc8fa7 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h
@@ -230,9 +230,8 @@ static inline bool af_alg_readable(struct sock *sk) return PAGE_SIZE <= af_alg_rcvbuf(sk); } -unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset); -void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, - size_t dst_offset); +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes); +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst); void af_alg_wmem_wakeup(struct sock *sk); int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min); int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index e4eebab..8b15d3e 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h
@@ -571,6 +571,8 @@ # define DP_PANEL_REPLAY_LINK_OFF_SUPPORTED_IN_PR_AFTER_ADAPTIVE_SYNC_SDP (1 << 7) #define DP_PANEL_REPLAY_CAP_X_GRANULARITY 0xb2 +# define DP_PANEL_REPLAY_FULL_LINE_GRANULARITY 0xffff + #define DP_PANEL_REPLAY_CAP_Y_GRANULARITY 0xb4 /* Link Configuration */
diff --git a/include/dt-bindings/reset/spacemit,k3-resets.h b/include/dt-bindings/reset/spacemit,k3-resets.h index 79ac1c2..dc1ef00 100644 --- a/include/dt-bindings/reset/spacemit,k3-resets.h +++ b/include/dt-bindings/reset/spacemit,k3-resets.h
@@ -97,11 +97,11 @@ #define RESET_APMU_SDH0 13 #define RESET_APMU_SDH1 14 #define RESET_APMU_SDH2 15 -#define RESET_APMU_USB2 16 -#define RESET_APMU_USB3_PORTA 17 -#define RESET_APMU_USB3_PORTB 18 -#define RESET_APMU_USB3_PORTC 19 -#define RESET_APMU_USB3_PORTD 20 +#define RESET_APMU_USB2_AHB 16 +#define RESET_APMU_USB2_VCC 17 +#define RESET_APMU_USB2_PHY 18 +#define RESET_APMU_USB3_A_AHB 19 +#define RESET_APMU_USB3_A_VCC 20 #define RESET_APMU_QSPI 21 #define RESET_APMU_QSPI_BUS 22 #define RESET_APMU_DMA 23 @@ -132,8 +132,8 @@ #define RESET_APMU_CPU7_SW 48 #define RESET_APMU_C1_MPSUB_SW 49 #define RESET_APMU_MPSUB_DBG 50 -#define RESET_APMU_UCIE 51 -#define RESET_APMU_RCPU 52 +#define RESET_APMU_USB3_A_PHY 51 /* USB3 A */ +#define RESET_APMU_USB3_B_AHB 52 #define RESET_APMU_DSI4LN2_ESCCLK 53 #define RESET_APMU_DSI4LN2_LCD_SW 54 #define RESET_APMU_DSI4LN2_LCD_MCLK 55 @@ -143,16 +143,40 @@ #define RESET_APMU_UFS_ACLK 59 #define RESET_APMU_EDP0 60 #define RESET_APMU_EDP1 61 -#define RESET_APMU_PCIE_PORTA 62 -#define RESET_APMU_PCIE_PORTB 63 -#define RESET_APMU_PCIE_PORTC 64 -#define RESET_APMU_PCIE_PORTD 65 -#define RESET_APMU_PCIE_PORTE 66 +#define RESET_APMU_USB3_B_VCC 62 /* USB3 B */ +#define RESET_APMU_USB3_B_PHY 63 +#define RESET_APMU_USB3_C_AHB 64 +#define RESET_APMU_USB3_C_VCC 65 +#define RESET_APMU_USB3_C_PHY 66 #define RESET_APMU_EMAC0 67 #define RESET_APMU_EMAC1 68 #define RESET_APMU_EMAC2 69 #define RESET_APMU_ESPI_MCLK 70 #define RESET_APMU_ESPI_SCLK 71 +#define RESET_APMU_USB3_D_AHB 72 /* USB3 D */ +#define RESET_APMU_USB3_D_VCC 73 +#define RESET_APMU_USB3_D_PHY 74 +#define RESET_APMU_UCIE_IP 75 +#define RESET_APMU_UCIE_HOT 76 +#define RESET_APMU_UCIE_MON 77 +#define RESET_APMU_RCPU_AUDIO_SYS 78 +#define RESET_APMU_RCPU_MCU_CORE 79 +#define RESET_APMU_RCPU_AUDIO_APMU 80 +#define RESET_APMU_PCIE_A_DBI 81 +#define RESET_APMU_PCIE_A_SLAVE 82 +#define RESET_APMU_PCIE_A_MASTER 83 +#define RESET_APMU_PCIE_B_DBI 84 +#define RESET_APMU_PCIE_B_SLAVE 85 +#define RESET_APMU_PCIE_B_MASTER 86 +#define RESET_APMU_PCIE_C_DBI 87 +#define RESET_APMU_PCIE_C_SLAVE 88 +#define RESET_APMU_PCIE_C_MASTER 89 +#define RESET_APMU_PCIE_D_DBI 90 +#define RESET_APMU_PCIE_D_SLAVE 91 +#define RESET_APMU_PCIE_D_MASTER 92 +#define RESET_APMU_PCIE_E_DBI 93 +#define RESET_APMU_PCIE_E_SLAVE 94 +#define RESET_APMU_PCIE_E_MASTER 95 /* DCIU resets*/ #define RESET_DCIU_HDMA 0
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 056ef7b..f9600f8 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h
@@ -477,7 +477,6 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_NOTIFY_PARTITION_EVENT 0x0087 #define HVCALL_ENTER_SLEEP_STATE 0x0084 #define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b -#define HVCALL_SCRUB_PARTITION 0x008d #define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 #define HVCALL_CREATE_PORT 0x0095 @@ -1121,6 +1120,8 @@ enum hv_register_name { HV_X64_REGISTER_MSR_MTRR_FIX4KF8000 = 0x0008007A, HV_X64_REGISTER_REG_PAGE = 0x0009001C, +#elif defined(CONFIG_ARM64) + HV_ARM64_REGISTER_SINT_RESERVED_INTERRUPT_ID = 0x00070001, #endif }; @@ -1532,4 +1533,10 @@ struct hv_mmio_write_input { u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; } __packed; +enum hv_intercept_access_type { + HV_INTERCEPT_ACCESS_READ = 0, + HV_INTERCEPT_ACCESS_WRITE = 1, + HV_INTERCEPT_ACCESS_EXECUTE = 2 +}; + #endif /* _HV_HVGDK_MINI_H */
diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 245f3db..5e83d371 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h
@@ -779,7 +779,7 @@ struct hv_x64_intercept_message_header { u32 vp_index; u8 instruction_length:4; u8 cr8:4; /* Only set for exo partitions */ - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_x64_vp_execution_state execution_state; struct hv_x64_segment_register cs_segment; u64 rip; @@ -825,7 +825,7 @@ union hv_arm64_vp_execution_state { struct hv_arm64_intercept_message_header { u32 vp_index; u8 instruction_length; - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_arm64_vp_execution_state execution_state; u64 pc; u64 cpsr;
diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h index c89b1b1..bfe60d6 100644 --- a/include/kunit/run-in-irq-context.h +++ b/include/kunit/run-in-irq-context.h
@@ -12,16 +12,16 @@ #include <linux/hrtimer.h> #include <linux/workqueue.h> -#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5) - struct kunit_irq_test_state { bool (*func)(void *test_specific_state); void *test_specific_state; bool task_func_reported_failure; bool hardirq_func_reported_failure; bool softirq_func_reported_failure; + atomic_t task_func_calls; atomic_t hardirq_func_calls; atomic_t softirq_func_calls; + ktime_t interval; struct hrtimer timer; struct work_struct bh_work; }; @@ -30,14 +30,25 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer) { struct kunit_irq_test_state *state = container_of(timer, typeof(*state), timer); + int task_calls, hardirq_calls, softirq_calls; WARN_ON_ONCE(!in_hardirq()); - atomic_inc(&state->hardirq_func_calls); + task_calls = atomic_read(&state->task_func_calls); + hardirq_calls = atomic_inc_return(&state->hardirq_func_calls); + softirq_calls = atomic_read(&state->softirq_func_calls); + + /* + * If the timer is firing too often for the softirq or task to ever have + * a chance to run, increase the timer interval. This is needed on very + * slow systems. + */ + if (hardirq_calls >= 20 && (softirq_calls == 0 || task_calls == 0)) + state->interval = ktime_add_ns(state->interval, 250); if (!state->func(state->test_specific_state)) state->hardirq_func_reported_failure = true; - hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL); + hrtimer_forward_now(&state->timer, state->interval); queue_work(system_bh_wq, &state->bh_work); return HRTIMER_RESTART; } @@ -86,10 +97,14 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), struct kunit_irq_test_state state = { .func = func, .test_specific_state = test_specific_state, + /* + * Start with a 5us timer interval. If the system can't keep + * up, kunit_irq_test_timer_func() will increase it. + */ + .interval = us_to_ktime(5), }; unsigned long end_jiffies; - int hardirq_calls, softirq_calls; - bool allctx = false; + int task_calls, hardirq_calls, softirq_calls; /* * Set up a hrtimer (the way we access hardirq context) and a work @@ -104,21 +119,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), * and hardirq), or 1 second, whichever comes first. */ end_jiffies = jiffies + HZ; - hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL, - HRTIMER_MODE_REL_HARD); - for (int task_calls = 0, calls = 0; - ((calls < max_iterations) || !allctx) && - !time_after(jiffies, end_jiffies); - task_calls++) { + hrtimer_start(&state.timer, state.interval, HRTIMER_MODE_REL_HARD); + do { if (!func(test_specific_state)) state.task_func_reported_failure = true; + task_calls = atomic_inc_return(&state.task_func_calls); hardirq_calls = atomic_read(&state.hardirq_func_calls); softirq_calls = atomic_read(&state.softirq_func_calls); - calls = task_calls + hardirq_calls + softirq_calls; - allctx = (task_calls > 0) && (hardirq_calls > 0) && - (softirq_calls > 0); - } + } while ((task_calls + hardirq_calls + softirq_calls < max_iterations || + (task_calls == 0 || hardirq_calls == 0 || + softirq_calls == 0)) && + !time_after(jiffies, end_jiffies)); /* Cancel the timer and work. */ hrtimer_cancel(&state.timer);
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 407f700..8bcb9b72 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h
@@ -4,6 +4,6 @@ #include <uapi/linux/auxvec.h> -#define AT_VECTOR_SIZE_BASE 22 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 24 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif /* _LINUX_AUXVEC_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b78b531..35b1e25 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops { u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); - int (*map_check_btf)(const struct bpf_map *map, + int (*map_check_btf)(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map) map->ops->map_seq_show_elem; } -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); @@ -1854,6 +1854,10 @@ struct bpf_link_ops { * target hook is sleepable, we'll go through tasks trace RCU GP and * then "classic" RCU GP; this need for chaining tasks trace and * classic RCU GPs is designated by setting bpf_link->sleepable flag + * + * For non-sleepable tracepoint links we go through SRCU gp instead, + * since RCU is not used in that case. Sleepable tracepoints still + * follow the scheme above. */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 85efa97..8157e8d 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h
@@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache); -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index e45162e..4ce0d27 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,8 @@ struct bpf_mem_alloc { struct obj_cgroup *objcg; bool percpu; struct work_struct work; + void (*dtor_ctx_free)(void *ctx); + void *dtor_ctx; }; /* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects. @@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg /* The percpu allocation with a specific unit size. */ int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, + void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), + void *ctx); /* Check the allocation size for kmalloc equivalent allocator */ int bpf_mem_alloc_check_size(bool percpu, size_t size);
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 2cfbb4c..d3dc5dc 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h
@@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index bb92f5c..7f87399 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h
@@ -609,6 +609,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + /* used by cgroup_rmdir() to wait for dying tasks to leave */ + wait_queue_head_t dying_populated_waitq; + /* used to schedule release agent */ struct work_struct release_agent_work;
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 13b3563..d5ca855 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h
@@ -160,6 +160,7 @@ struct vc_data { struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ u16 *vc_saved_screen; + u32 **vc_saved_uni_lines; unsigned int vc_saved_cols; unsigned int vc_saved_rows; /* additional information is in vt_kern.h */
diff --git a/include/linux/damon.h b/include/linux/damon.h index a4fea23..be3d198 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h
@@ -810,6 +810,12 @@ struct damon_ctx { struct damos_walk_control *walk_control; struct mutex walk_control_lock; + /* + * indicate if this may be corrupted. Currentonly this is set only for + * damon_commit_ctx() failure. + */ + bool maybe_corrupted; + /* Working thread of the given DAMON context */ struct task_struct *kdamond; /* Protects @kdamond field access */
diff --git a/include/linux/device.h b/include/linux/device.h index 0be9529..e65d564f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h
@@ -483,6 +483,8 @@ struct device_physical_location { * on. This shrinks the "Board Support Packages" (BSPs) and * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. + * @driver_override: Driver name to force a match. Do not touch directly; use + * device_set_driver_override() instead. * @links: Links to suppliers and consumers of this device. * @power: For device power management. * See Documentation/driver-api/pm/devices.rst for details. @@ -576,6 +578,10 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ + struct { + const char *name; + spinlock_t lock; + } driver_override; struct mutex mutex; /* mutex to synchronize calls to * its driver. */ @@ -701,6 +707,54 @@ struct device_link { #define kobj_to_dev(__kobj) container_of_const(__kobj, struct device, kobj) +int __device_set_driver_override(struct device *dev, const char *s, size_t len); + +/** + * device_set_driver_override() - Helper to set or clear driver override. + * @dev: Device to change + * @s: NUL-terminated string, new driver name to force a match, pass empty + * string to clear it ("" or "\n", where the latter is only for sysfs + * interface). + * + * Helper to set or clear driver override of a device. + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int device_set_driver_override(struct device *dev, const char *s) +{ + return __device_set_driver_override(dev, s, s ? strlen(s) : 0); +} + +/** + * device_has_driver_override() - Check if a driver override has been set. + * @dev: device to check + * + * Returns true if a driver override has been set for this device. + */ +static inline bool device_has_driver_override(struct device *dev) +{ + guard(spinlock)(&dev->driver_override.lock); + return !!dev->driver_override.name; +} + +/** + * device_match_driver_override() - Match a driver against the device's driver_override. + * @dev: device to check + * @drv: driver to match against + * + * Returns > 0 if a driver override is set and matches the given driver, 0 if a + * driver override is set but does not match, or < 0 if a driver override is not + * set at all. + */ +static inline int device_match_driver_override(struct device *dev, + const struct device_driver *drv) +{ + guard(spinlock)(&dev->driver_override.lock); + if (dev->driver_override.name) + return !strcmp(dev->driver_override.name, drv->name); + return -1; +} + /** * device_iommu_mapped - Returns true when the device DMA is translated * by an IOMMU
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 99c3c83..c1b463c 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h
@@ -35,6 +35,8 @@ struct fwnode_handle; * otherwise. It may also return error code if determining that * the driver supports the device is not possible. In case of * -EPROBE_DEFER it will queue the device for deferred probing. + * Note: This callback may be invoked with or without the device + * lock held. * @uevent: Called when a device is added, removed, or a few other things * that generate uevents to add the environment variables. * @probe: Called when a new device or driver add to this bus, and callback @@ -63,6 +65,9 @@ struct fwnode_handle; * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. + * @driver_override: Set to true if this bus supports the driver_override + * mechanism, which allows userspace to force a specific + * driver to bind to a device via a sysfs attribute. * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. * @@ -104,6 +109,7 @@ struct bus_type { const struct dev_pm_ops *pm; + bool driver_override; bool need_parent_lock; };
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29973ba..99ef042 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h
@@ -80,11 +80,18 @@ #define DMA_ATTR_MMIO (1UL << 10) /* - * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline - * overlapping this buffer while it is mapped for DMA. All mappings sharing - * a cacheline must have this attribute for this to be considered safe. + * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be + * overlapped. All mappings sharing a cacheline must have this attribute for + * this to be considered safe. */ -#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11) +#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11) + +/* + * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required. + * All mappings that carry this attribute can't work with SWIOTLB and cache + * flushing. + */ +#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can @@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, { return NULL; } -static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { } static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9a1eacf..df8f88f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h
@@ -42,7 +42,8 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index ccb478e..ea9ca0e 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h
@@ -82,11 +82,14 @@ static inline struct epoll_event __user * epoll_put_uevent(__poll_t revents, __u64 data, struct epoll_event __user *uevent) { - if (__put_user(revents, &uevent->events) || - __put_user(data, &uevent->data)) - return NULL; - + scoped_user_write_access_size(uevent, sizeof(*uevent), efault) { + unsafe_put_user(revents, &uevent->events, efault); + unsafe_put_user(data, &uevent->data, efault); + } return uevent+1; + +efault: + return NULL; } #endif
diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index d290060..9101316 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h
@@ -68,12 +68,12 @@ * timeout value used in Stratix10 FPGA manager driver. * timeout value used in RSU driver */ -#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 300 -#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 720 -#define SVC_RSU_REQUEST_TIMEOUT_MS 300 +#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 5000 +#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 5000 +#define SVC_RSU_REQUEST_TIMEOUT_MS 2000 #define SVC_FCS_REQUEST_TIMEOUT_MS 2000 #define SVC_COMPLETED_TIMEOUT_MS 30000 -#define SVC_HWMON_REQUEST_TIMEOUT_MS 300 +#define SVC_HWMON_REQUEST_TIMEOUT_MS 2000 struct stratix10_svc_chan;
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index fa7638b..383050e 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h
@@ -338,5 +338,6 @@ struct super_block { #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ #define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ +#define SB_I_NO_DATA_INTEGRITY 0x00008000 /* fs cannot guarantee data persistence on sync */ #endif /* _LINUX_FS_SUPER_TYPES_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 28a9cb1..079c18b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h
@@ -495,19 +495,6 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } -static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, - int error) -{ - struct fs_error_report report = { - .error = error, - .inode = inode, - .sb = sb, - }; - - return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, - NULL, NULL, NULL, 0); -} - static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b30a05..51ef13e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h
@@ -14,8 +14,8 @@ struct vm_area_struct; struct mempolicy; /* Helper macro to avoid gfp flags if they are the default one */ -#define __default_gfp(a,...) a -#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) +#define __default_gfp(a,b,...) b +#define default_gfp(...) __default_gfp(,##__VA_ARGS__,GFP_KERNEL) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -339,8 +339,11 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde { return folio_alloc_noprof(gfp, order); } -#define vma_alloc_folio_noprof(gfp, order, vma, addr) \ - folio_alloc_noprof(gfp, order) +static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr) +{ + return folio_alloc_noprof(gfp, order); +} #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 814bb28..6c75df3 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h
@@ -139,6 +139,8 @@ enum { * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. * * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h index 592a774..8061b98 100644 --- a/include/linux/gpio/gpio-nomadik.h +++ b/include/linux/gpio/gpio-nomadik.h
@@ -114,8 +114,7 @@ struct nmk_gpio_chip { } /** - * enum prcm_gpiocr_reg_index - * Used to reference an PRCM GPIOCR register address. + * enum prcm_gpiocr_reg_index - Used to reference a PRCM GPIOCR register address. */ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR1, @@ -123,8 +122,7 @@ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR3 }; /** - * enum prcm_gpiocr_altcx_index - * Used to reference an Other alternate-C function. + * enum prcm_gpiocr_altcx_index - Used to reference an Other alternate-C function. */ enum prcm_gpiocr_altcx_index { PRCM_IDX_GPIOCR_ALTC1, @@ -135,7 +133,7 @@ enum prcm_gpiocr_altcx_index { }; /** - * struct prcm_gpio_altcx - Other alternate-C function + * struct prcm_gpiocr_altcx - Other alternate-C function * @used: other alternate-C function availability * @reg_index: PRCM GPIOCR register index used to control the function * @control_bit: PRCM GPIOCR bit used to control the function @@ -147,7 +145,7 @@ struct prcm_gpiocr_altcx { } __packed; /** - * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * struct prcm_gpiocr_altcx_pin_desc - Other alternate-C pin * @pin: The pin number * @altcx: array of other alternate-C[1-4] functions */ @@ -193,7 +191,7 @@ struct nmk_pingroup { * numbering. * @npins: The number of entries in @pins. * @functions: The functions supported on this SoC. - * @nfunction: The number of entries in @functions. + * @nfunctions: The number of entries in @functions. * @groups: An array describing all pin groups the pin SoC supports. * @ngroups: The number of entries in @groups. * @altcx_pins: The pins that support Other alternate-C function on this SoC
diff --git a/include/linux/hid.h b/include/linux/hid.h index dce862c..3132460 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h
@@ -682,6 +682,7 @@ struct hid_device { __s32 battery_charge_status; enum hid_battery_status battery_status; bool battery_avoid_query; + bool battery_present; ktime_t battery_ratelimit_time; #endif @@ -836,6 +837,12 @@ struct hid_usage_id { * raw_event and event should return negative on error, any other value will * pass the event on to .event() typically return 0 for success. * + * report_fixup must return a report descriptor pointer whose lifetime is at + * least that of the input rdesc. This is usually done by mutating the input + * rdesc and returning it or a sub-portion of it. In case a new buffer is + * allocated and returned, the implementation of report_fixup is responsible for + * freeing it later. + * * input_mapping shall return a negative value to completely ignore this usage * (e.g. doubled or invalid usage), zero to continue with parsing of this * usage by generic code (no special handling needed) or positive to skip
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 61b7335..ca9afa8 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h
@@ -40,7 +40,8 @@ static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_inner_mac_header(skb); } -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index a9ecff1..2c91b76 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h
@@ -931,6 +931,18 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) #define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \ __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN) +/** + * IIO_DECLARE_QUATERNION() - Declare a quaternion element + * @type: element type of the individual vectors + * @name: identifier name + * + * Quaternions are a vector composed of 4 elements (W, X, Y, Z). Use this macro + * to declare a quaternion element in a struct to ensure proper alignment in + * an IIO buffer. + */ +#define IIO_DECLARE_QUATERNION(type, name) \ + type name[4] __aligned(sizeof(type) * 4) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 35227d4..dc272b5 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h
@@ -16,22 +16,26 @@ */ #define INDIRECT_CALL_1(f, f1, ...) \ ({ \ - likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ + typeof(f) __f1 = (f); \ + likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \ }) #define INDIRECT_CALL_2(f, f2, f1, ...) \ ({ \ - likely(f == f2) ? f2(__VA_ARGS__) : \ - INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ + typeof(f) __f2 = (f); \ + likely(__f2 == f2) ? f2(__VA_ARGS__) : \ + INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ ({ \ - likely(f == f3) ? f3(__VA_ARGS__) : \ - INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + typeof(f) __f3 = (f); \ + likely(__f3 == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ ({ \ - likely(f == f4) ? f4(__VA_ARGS__) : \ - INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + typeof(f) __f4 = (f); \ + likely(__f4 == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALLABLE_DECLARE(f) f
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 7a15160..e19872e3 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h
@@ -53,7 +53,7 @@ struct iommu_flush_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. - * @coherent_walk A flag to indicate whether or not page table walks made + * @coherent_walk: A flag to indicate whether or not page table walks made * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the @@ -136,6 +136,7 @@ struct io_pgtable_cfg { void (*free)(void *cookie, void *pages, size_t size); /* Low-level data specific to the table format */ + /* private: */ union { struct { u64 ttbr; @@ -203,6 +204,9 @@ struct arm_lpae_io_pgtable_walk_data { * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * @pgtable_walk: (optional) Perform a page table walk for a given iova. + * @read_and_clear_dirty: Record dirty info per IOVA. If an IOVA is dirty, + * clear its dirty state from the PTE unless the + * IOMMU_DIRTY_NO_CLEAR flag is passed in. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -231,7 +235,9 @@ struct io_pgtable_ops { * the configuration actually provided by the allocator (e.g. the * pgsize_bitmap may be restricted). * @cookie: An opaque token provided by the IOMMU driver and passed back to - * the callback routines in cfg->tlb. + * the callback routines. + * + * Returns: Pointer to the &struct io_pgtable_ops for this set of page tables. */ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, struct io_pgtable_cfg *cfg,
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3e4a82a..214fdbd 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h
@@ -388,6 +388,7 @@ struct io_ring_ctx { * regularly bounce b/w CPUs. */ struct { + struct io_rings __rcu *rings_rcu; struct llist_head work_llist; struct llist_head retry_llist; unsigned long check_cq; @@ -540,6 +541,7 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_BUF_MORE_BIT, REQ_F_HAS_METADATA_BIT, REQ_F_IMPORT_BUFFER_BIT, REQ_F_SQE_COPIED_BIT, @@ -625,6 +627,8 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* incremental buffer consumption, more space available */ + REQ_F_BUF_MORE = IO_REQ_FLAG(REQ_F_BUF_MORE_BIT), /* request has read/write metadata assigned */ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), /*
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 54b8b48..555597b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h
@@ -980,7 +980,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - if (domain->ops->iotlb_sync) + if (domain->ops->iotlb_sync && + likely(iotlb_gather->start < iotlb_gather->end)) domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 443053a..a742138 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h
@@ -333,7 +333,12 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -bool ipv6_mod_enabled(void); +extern int disable_ipv6_mod; + +static inline bool ipv6_mod_enabled(void) +{ + return disable_ipv6_mod == 0; +} static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) {
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 70c0948..0225121 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h
@@ -394,6 +394,7 @@ #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 #define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS GENMASK_ULL(12, 8) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fdef2c1..d1c3d49 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h
@@ -434,8 +434,44 @@ extern unsigned long preset_lpj; /* * Convert various time units to each other: */ -extern unsigned int jiffies_to_msecs(const unsigned long j); -extern unsigned int jiffies_to_usecs(const unsigned long j); + +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/** + * jiffies_to_msecs - Convert jiffies to milliseconds + * @j: jiffies value + * + * This inline version takes care of HZ in {100,250,1000}. + * + * Return: milliseconds value + */ +static inline unsigned int jiffies_to_msecs(const unsigned long j) +{ + return (MSEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_msecs(const unsigned long j); +#endif + +#if !(USEC_PER_SEC % HZ) +/** + * jiffies_to_usecs - Convert jiffies to microseconds + * @j: jiffies value + * + * Return: microseconds value + */ +static inline unsigned int jiffies_to_usecs(const unsigned long j) +{ + /* + * Hz usually doesn't go much further MSEC_PER_SEC. + * jiffies_to_usecs() and usecs_to_jiffies() depend on that. + */ + BUILD_BUG_ON(HZ > USEC_PER_SEC); + + return (USEC_PER_SEC / HZ) * j; +} +#else +unsigned int jiffies_to_usecs(const unsigned long j); +#endif /** * jiffies_to_nsecs - Convert jiffies to nanoseconds
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c92c114..a01a474 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h
@@ -7,6 +7,24 @@ struct mm_struct; +/* opaque kthread data */ +struct kthread; + +/* + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. + * + * Return NULL for any task that is not a kthread. + */ +static inline struct kthread *tsk_is_kthread(struct task_struct *p) +{ + if (p->flags & PF_KTHREAD) + return p->worker_private; + return NULL; +} + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; +#define kthread_exit(result) do_exit(result) void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreads_update_housekeeping(void); +void kthread_do_exit(struct kthread *, long); int kthreadd(void *unused); extern struct task_struct *kthreadd_task;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dde605cb..6b76e7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h
@@ -253,7 +253,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { unsigned long attributes; }; @@ -275,7 +274,6 @@ struct kvm_gfn_range { bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -#endif enum { OUTSIDE_GUEST_MODE, @@ -849,13 +847,12 @@ struct kvm { struct hlist_head irq_ack_notifier_list; #endif -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; gfn_t mmu_invalidate_range_start; gfn_t mmu_invalidate_range_end; -#endif + struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; @@ -1943,56 +1940,43 @@ enum kvm_stat_kind { struct kvm_stat_data { struct kvm *kvm; - const struct _kvm_stats_desc *desc; + const struct kvm_stats_desc *desc; enum kvm_stat_kind kind; }; -struct _kvm_stats_desc { - struct kvm_stats_desc desc; - char name[KVM_STATS_NAME_SIZE]; -}; - -#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ - .flags = type | unit | base | \ - BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ - BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ - BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ - .exponent = exp, \ - .size = sz, \ +#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ + .flags = type | unit | base | \ + BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ + BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ + BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ + .exponent = exp, \ + .size = sz, \ .bucket_size = bsz -#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \ - }, \ - .name = #stat, \ - } -#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vm_stat, stat) \ - }, \ - .name = #stat, \ - } -#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ - { \ - { \ - STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ - .offset = offsetof(struct kvm_vcpu_stat, stat) \ - }, \ - .name = #stat, \ - } +#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, generic.stat), \ + .name = #stat, \ +} +#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, generic.stat), \ + .name = #stat, \ +} +#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vm_stat, stat), \ + .name = #stat, \ +} +#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ +{ \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = offsetof(struct kvm_vcpu_stat, stat), \ + .name = #stat, \ +} /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -2069,7 +2053,7 @@ struct _kvm_stats_desc { STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); @@ -2114,11 +2098,10 @@ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) extern const struct kvm_stats_header kvm_vm_stats_header; -extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; +extern const struct kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; -extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; +extern const struct kvm_stats_desc kvm_vcpu_stats_desc[]; -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -2196,7 +2179,6 @@ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } -#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
diff --git a/include/linux/leafops.h b/include/linux/leafops.h index a9ff94b..05673d3 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h
@@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) return swp_offset(entry) & SWP_PFN_MASK; } +static inline void softleaf_migration_sync(softleaf_t entry, + struct folio *folio) +{ + /* + * Ensure we do not race with split, which might alter tail pages into new + * folios and thus result in observing an unlocked folio. + * This matches the write barrier in __split_folio_to_order(). + */ + smp_rmb(); + + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(!folio_test_locked(folio)); +} + /** * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. @@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry) struct page *page = pfn_to_page(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, page_folio(page)); return page; } @@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry) struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked. - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && - !folio_test_locked(folio)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, folio); return folio; }
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h index b72b8cd..feb60ba 100644 --- a/include/linux/lis3lv02d.h +++ b/include/linux/lis3lv02d.h
@@ -30,8 +30,8 @@ * @default_rate: Default sampling rate. 0 means reset default * @setup_resources: Interrupt line setup call back function * @release_resources: Interrupt line release call back function - * @st_min_limits[3]: Selftest acceptance minimum values - * @st_max_limits[3]: Selftest acceptance maximum values + * @st_min_limits: Selftest acceptance minimum values (x, y, z) + * @st_max_limits: Selftest acceptance maximum values (x, y, z) * @irq2: Irq line 2 number * * Platform data is used to setup the sensor chip. Meaning of the different
diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h index fe82a6c..dd11fdc 100644 --- a/include/linux/liveupdate.h +++ b/include/linux/liveupdate.h
@@ -23,8 +23,11 @@ struct file; /** * struct liveupdate_file_op_args - Arguments for file operation callbacks. * @handler: The file handler being called. - * @retrieved: The retrieve status for the 'can_finish / finish' - * operation. + * @retrieve_status: The retrieve status for the 'can_finish / finish' + * operation. A value of 0 means the retrieve has not been + * attempted, a positive value means the retrieve was + * successful, and a negative value means the retrieve failed, + * and the value is the error code of the call. * @file: The file object. For retrieve: [OUT] The callback sets * this to the new file. For other ops: [IN] The caller sets * this to the file being operated on. @@ -40,7 +43,7 @@ struct file; */ struct liveupdate_file_op_args { struct liveupdate_file_handler *handler; - bool retrieved; + int retrieve_status; struct file *file; u64 serialized_data; void *private_data;
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index eff711b..234be7f 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h
@@ -315,7 +315,7 @@ do { \ #endif /* CONFIG_PREEMPT_RT */ -#if defined(WARN_CONTEXT_ANALYSIS) +#if defined(WARN_CONTEXT_ANALYSIS) && !defined(__CHECKER__) /* * Because the compiler only knows about the base per-CPU variable, use this * helper function to make the compiler think we lock/unlock the @base variable,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0fe96f3..65c732d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; + struct rcu_head rcu; }; /*
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 26ca00c..d5af2b7 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h
@@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag return -ENOSYS; } +static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) + __releases(ptl) +{ + WARN_ON_ONCE(1); + + spin_unlock(ptl); +} + #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING
diff --git a/include/linux/mm.h b/include/linux/mm.h index 5be3d8a..abb4963 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -3514,26 +3514,21 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc) -{ - return compound_nr(ptdesc_page(ptdesc)); -} - static inline void __pagetable_ctor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); - __SetPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc)); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor(struct ptdesc *ptdesc) { - pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); + struct folio *folio = ptdesc_folio(ptdesc); ptlock_free(ptdesc); - __ClearPageTable(ptdesc_page(ptdesc)); - mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc)); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); } static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e0e2c26..ba84f02 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h
@@ -486,14 +486,12 @@ struct mmc_host { struct mmc_ios ios; /* current io bus settings */ + bool claimed; /* host exclusively claimed */ + /* group bitfields together to minimize padding */ unsigned int use_spi_crc:1; - unsigned int claimed:1; /* host exclusively claimed */ unsigned int doing_init_tune:1; /* initial tuning in progress */ - unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ - unsigned int retune_now:1; /* do re-tuning at next req */ - unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */ unsigned int can_dma_map_merge:1; /* merging can be used */ unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ @@ -508,6 +506,9 @@ struct mmc_host { int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ + bool can_retune; /* re-tuning can be used */ + bool retune_now; /* do re-tuning at next req */ + bool retune_paused; /* re-tuning is temporarily disabled */ int need_retune; /* re-tuning is needed */ int hold_retune; /* hold off re-tuning */ unsigned int retune_period; /* re-tuning period in secs */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 07a2bba..8450e18 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h
@@ -234,7 +234,7 @@ struct mmu_notifier { }; /** - * struct mmu_interval_notifier_ops + * struct mmu_interval_notifier_ops - callback for range notification * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. @@ -309,8 +309,8 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); /** * mmu_interval_set_seq - Save the invalidation sequence - * @interval_sub - The subscription passed to invalidate - * @cur_seq - The cur_seq passed to the invalidate() callback + * @interval_sub: The subscription passed to invalidate + * @cur_seq: The cur_seq passed to the invalidate() callback * * This must be called unconditionally from the invalidate callback of a * struct mmu_interval_notifier_ops under the same lock that is used to call @@ -329,8 +329,8 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_read_retry - End a read side critical section against a VA range - * interval_sub: The subscription - * seq: The return of the paired mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the paired mmu_interval_read_begin() * * This MUST be called under a user provided lock that is also held * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). @@ -338,7 +338,7 @@ mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, * Each call should be paired with a single mmu_interval_read_begin() and * should be used to conclude the read side. * - * Returns true if an invalidation collided with this critical section, and + * Returns: true if an invalidation collided with this critical section, and * the caller should retry. */ static inline bool @@ -350,20 +350,21 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, /** * mmu_interval_check_retry - Test if a collision has occurred - * interval_sub: The subscription - * seq: The return of the matching mmu_interval_read_begin() + * @interval_sub: The subscription + * @seq: The return of the matching mmu_interval_read_begin() * * This can be used in the critical section between mmu_interval_read_begin() - * and mmu_interval_read_retry(). A return of true indicates an invalidation - * has collided with this critical region and a future - * mmu_interval_read_retry() will return true. - * - * False is not reliable and only suggests a collision may not have - * occurred. It can be called many times and does not have to hold the user - * provided lock. + * and mmu_interval_read_retry(). * * This call can be used as part of loops and other expensive operations to * expedite a retry. + * It can be called many times and does not have to hold the user + * provided lock. + * + * Returns: true indicates an invalidation has collided with this critical + * region and a future mmu_interval_read_retry() will return true. + * False is not reliable and only suggests a collision may not have + * occurred. */ static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub,
diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 1bdc39d..3589469 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h
@@ -17,7 +17,14 @@ struct readahead_control; void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); -int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block); +int __mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)); +static inline int mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block) +{ + return __mpage_writepages(mapping, wbc, get_block, NULL); +} #endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4e6e00..7ca01eb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h
@@ -311,7 +311,9 @@ struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len); - int (*parse)(const struct sk_buff *skb, unsigned char *haddr); + int (*parse)(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -2155,6 +2157,7 @@ struct net_device { unsigned long state; unsigned int flags; unsigned short hard_header_len; + enum netdev_stat_type pcpu_stat_type:8; netdev_features_t features; struct inet6_dev __rcu *ip6_ptr; __cacheline_group_end(net_device_read_txrx); @@ -2404,8 +2407,6 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; - enum netdev_stat_type pcpu_stat_type:8; - #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; #endif @@ -3446,7 +3447,7 @@ static inline int dev_parse_header(const struct sk_buff *skb, if (!dev->header_ops || !dev->header_ops->parse) return 0; - return dev->header_ops->parse(skb, haddr); + return dev->header_ops->parse(skb, dev, haddr); } static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb) @@ -3576,17 +3577,49 @@ struct page_pool_bh { }; DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); +#define XMIT_RECURSION_LIMIT 8 + #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } + +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} #else static inline int dev_recursion_level(void) { return current->net_xmit.recursion; } +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->net_xmit.recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->net_xmit.recursion--; +} #endif void __netif_schedule(struct Qdisc *q); @@ -4711,7 +4744,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, cpu); } @@ -4729,7 +4762,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } @@ -4738,7 +4771,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } return ok; @@ -4746,14 +4779,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) static inline void __netif_tx_unlock(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } @@ -4846,6 +4879,23 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_enable(); } +#ifndef CONFIG_PREEMPT_RT +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + return READ_ONCE(txq->xmit_lock_owner) == cpu; +} + +#else +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + return rt_mutex_owner(&txq->_xmit_lock.lock) == current; +} + +#endif + static inline void netif_addr_lock(struct net_device *dev) { unsigned char nest_level = 0;
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f84..b983315 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h
@@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 72ee7d2..ba17ac5b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h
@@ -140,7 +140,6 @@ struct netfs_io_stream { void (*issue_write)(struct netfs_io_subrequest *subreq); /* Collection tracking */ struct list_head subrequests; /* Contributory I/O operations */ - struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ unsigned short error; /* Aggregate error for the stream */
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 825f586..c8e227a 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h
@@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +bool may_see_all_namespaces(void); + static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active);
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 60e069a..e75c29c 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h
@@ -11,7 +11,7 @@ struct nvme_dhchap_key { size_t len; u8 hash; - u8 key[]; + u8 key[] __counted_by(len); }; u32 nvme_auth_get_seqnum(void);
diff --git a/include/linux/overflow.h b/include/linux/overflow.h index eddd987..a8cb631 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h
@@ -42,7 +42,7 @@ * both the type-agnostic benefits of the macros while also being able to * enforce that the return value is, in fact, checked. */ -static inline bool __must_check __must_check_overflow(bool overflow) +static __always_inline bool __must_check __must_check_overflow(bool overflow) { return unlikely(overflow); } @@ -327,7 +327,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +static __always_inline size_t __must_check size_mul(size_t factor1, size_t factor2) { size_t bytes; @@ -346,7 +346,7 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2) * with any overflow causing the return value to be SIZE_MAX. The * lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_add(size_t addend1, size_t addend2) +static __always_inline size_t __must_check size_add(size_t addend1, size_t addend2) { size_t bytes; @@ -367,7 +367,7 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2) * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). * The lvalue must be size_t to avoid implicit type conversion. */ -static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) +static __always_inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) { size_t bytes;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec442af..31a8484 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h
@@ -210,7 +210,6 @@ enum mapping_flags { AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't account usage to user cgroups */ - AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -346,16 +345,6 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline void mapping_set_no_data_integrity(struct address_space *mapping) -{ - set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - -static inline bool mapping_no_data_integrity(const struct address_space *mapping) -{ - return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask;
diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index f6cca7a..50b6be5 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h
@@ -13,10 +13,10 @@ /** * enum mlxreg_wdt_type - type of HW watchdog * - * TYPE1 HW watchdog implementation exist in old systems. - * All new systems have TYPE2 HW watchdog. - * TYPE3 HW watchdog can exist on all systems with new CPLD. - * TYPE3 is selected by WD capability bit. + * @MLX_WDT_TYPE1: HW watchdog implementation in old systems. + * @MLX_WDT_TYPE2: All new systems have TYPE2 HW watchdog. + * @MLX_WDT_TYPE3: HW watchdog that can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, @@ -35,7 +35,7 @@ enum mlxreg_wdt_type { * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming * after hardware-firmware synchronization handshake; * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card - PHYs ready / unready state; + * PHYs ready / unready state; * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware * availability / unavailability for the ports on line card; * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive @@ -123,8 +123,8 @@ struct mlxreg_hotplug_device { * @reg_pwr: attribute power register; * @reg_ena: attribute enable register; * @mode: access mode; - * @np - pointer to node platform associated with attribute; - * @hpdev - hotplug device data; + * @np: pointer to node platform associated with attribute; + * @hpdev: hotplug device data; * @notifier: pointer to event notifier block; * @health_cntr: dynamic device health indication counter; * @attached: true if device has been attached after good health indication;
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index b1b8375..dbe745d 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h
@@ -26,6 +26,7 @@ #define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d +#define INT3472_GPIO_TYPE_DOVDD 0x10 #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 #define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 @@ -33,8 +34,8 @@ #define INT3472_MAX_SENSOR_GPIOS 3 #define INT3472_MAX_REGULATORS 3 -/* E.g. "avdd\0" */ -#define GPIO_SUPPLY_NAME_LENGTH 5 +/* E.g. "dovdd\0" */ +#define GPIO_SUPPLY_NAME_LENGTH 6 /* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */ #define GPIO_REGULATOR_NAME_LENGTH (12 + GPIO_SUPPLY_NAME_LENGTH) /* lower- and upper-case mapping */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 813da10..ed1d50d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h
@@ -31,11 +31,6 @@ struct platform_device { struct resource *resource; const struct platform_device_id *id_entry; - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; /* MFD cell pointer */ struct mfd_cell *mfd_cell;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 41037c5..64921b1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h
@@ -545,22 +545,10 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). - * - * Return: - * * 1: Success. Usage counter dropped to zero, but device was already suspended. - * * 0: Success. - * * -EINVAL: Runtime PM error. - * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status - * change ongoing. - * * -EBUSY: Runtime PM child_count non-zero. - * * -EPERM: Device PM QoS resume latency 0. - * * -EINPROGRESS: Suspend already in progress. - * * -ENOSYS: CONFIG_PM not enabled. */ -static inline int pm_runtime_put(struct device *dev) +static inline void pm_runtime_put(struct device *dev) { - return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); + __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /**
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 876358c..d862fa6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h
@@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma); +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu); int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); #endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 7a01a07..b9d62fc 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) t->rseq = current->rseq; } +/* + * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq + * registration. This is the active rseq area size rounded up to next + * power of 2, which guarantees that the rseq structure will always be + * aligned on the nearest power of two large enough to contain it, even + * as it grows. + */ +static inline unsigned int rseq_alloc_align(void) +{ + return 1U << get_count_order(offsetof(struct rseq, end)); +} + #else /* CONFIG_RSEQ */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index cbc4a79..c6831c9 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h
@@ -216,10 +216,10 @@ static __always_inline bool rseq_grant_slice_extension(bool work_pending) } #else /* CONFIG_RSEQ_SLICE_EXTENSION */ -static inline bool rseq_slice_extension_enabled(void) { return false; } -static inline bool rseq_arm_slice_extension_timer(void) { return false; } -static inline void rseq_slice_clear_grant(struct task_struct *t) { } -static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static __always_inline bool rseq_slice_extension_enabled(void) { return false; } +static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } +static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index da5fa6f..0b42045 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h
@@ -133,10 +133,12 @@ struct rseq_data { }; * @active: MM CID is active for the task * @cid: The CID associated to the task either permanently or * borrowed from the CPU + * @node: Queued in the per MM MMCID list */ struct sched_mm_cid { unsigned int active; unsigned int cid; + struct hlist_node node; }; /** @@ -157,6 +159,7 @@ struct mm_cid_pcpu { * @work: Regular work to handle the affinity mode change case * @lock: Spinlock to protect against affinity setting which can't take @mutex * @mutex: Mutex to serialize forks and exits related to this mm + * @user_list: List of the MM CID users of a MM * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map * is growth only. * @users: The number of tasks sharing this MM. Separate from mm::mm_users @@ -177,13 +180,14 @@ struct mm_mm_cid { raw_spinlock_t lock; struct mutex mutex; + struct hlist_head user_list; /* Low frequency modified */ unsigned int nr_cpus_allowed; unsigned int users; unsigned int pcpu_thrs; unsigned int update_deferred; -}____cacheline_aligned_in_smp; +} ____cacheline_aligned; #else /* CONFIG_SCHED_MM_CID */ struct mm_mm_cid { }; struct sched_mm_cid { };
diff --git a/include/linux/sched.h b/include/linux/sched.h index 074ad4e..5a5d3db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -579,6 +579,7 @@ struct sched_entity { u64 deadline; u64 min_vruntime; u64 min_slice; + u64 max_slice; struct list_head group_node; unsigned char on_rq; @@ -2353,7 +2354,6 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #ifdef CONFIG_SCHED_MM_CID void sched_mm_cid_before_execve(struct task_struct *t); void sched_mm_cid_after_execve(struct task_struct *t); -void sched_mm_cid_fork(struct task_struct *t); void sched_mm_cid_exit(struct task_struct *t); static __always_inline int task_mm_cid(struct task_struct *t) { @@ -2362,7 +2362,6 @@ static __always_inline int task_mm_cid(struct task_struct *t) #else static inline void sched_mm_cid_before_execve(struct task_struct *t) { } static inline void sched_mm_cid_after_execve(struct task_struct *t) { } -static inline void sched_mm_cid_fork(struct task_struct *t) { } static inline void sched_mm_cid_exit(struct task_struct *t) { } static __always_inline int task_mm_cid(struct task_struct *t) {
diff --git a/include/linux/security.h b/include/linux/security.h index 83a646d..ee88dd2d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h
@@ -145,6 +145,7 @@ enum lockdown_reason { LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_RTAS_ERROR_INJECTION, + LOCKDOWN_XEN_USER_ACTIONS, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES,
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 01efdce..a95b2d1 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h
@@ -195,6 +195,7 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl); void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot); int fsl8250_handle_irq(struct uart_port *port); +void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); void serial8250_read_char(struct uart_8250_port *up, u16 lsr);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e49..2f278ce 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h
@@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb) return unlikely(skb->active_extensions); } #else +static inline void __skb_ext_put(struct skb_ext *ext) {} static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
diff --git a/include/linux/slab.h b/include/linux/slab.h index a5a5e41..15a60b5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h
@@ -517,18 +517,6 @@ void kfree_sensitive(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) -/** - * ksize - Report actual allocation size of associated object - * - * @objp: Pointer returned from a prior kmalloc()-family allocation. - * - * This should not be used for writing beyond the originally requested - * allocation size. Either use krealloc() or round up the allocation size - * with kmalloc_size_roundup() prior to allocation. If this is used to - * access beyond the originally requested allocation size, UBSAN_BOUNDS - * and/or FORTIFY_SOURCE may trip, since they only know about the - * originally allocated size via the __alloc_size attribute. - */ size_t ksize(const void *objp); #ifdef CONFIG_PRINTK
diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h index 83a8ea6..2b7691e 100644 --- a/include/linux/soc/qcom/pdr.h +++ b/include/linux/soc/qcom/pdr.h
@@ -5,6 +5,7 @@ #include <linux/soc/qcom/qmi.h> #define SERVREG_NAME_LENGTH 64 +#define SERVREG_PFR_LENGTH 256 struct pdr_service; struct pdr_handle;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af7cfee..0dc671c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h
@@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @driver_override: If the name of a driver is written to this attribute, then - * the device will bind to the named driver and only the named driver. - * Do not set directly, because core frees it; use driver_set_override() to - * set or clear it. * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer @@ -224,7 +220,6 @@ struct spi_device { void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; - const char *driver_override; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics;
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index dec7cbe..905b629 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h
@@ -11,6 +11,7 @@ #ifndef _LINUX_SRCU_TINY_H #define _LINUX_SRCU_TINY_H +#include <linux/irq_work_types.h> #include <linux/swait.h> struct srcu_struct { @@ -24,18 +25,21 @@ struct srcu_struct { struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ + struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; void srcu_drive_gp(struct work_struct *wp); +void srcu_tiny_irq_work(struct irq_work *irq_work); #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + .srcu_irq_work = { .func = srcu_tiny_irq_work }, \ __SRCU_DEP_MAP_INIT(name) \ }
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 958cb7e..be76fa4 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h
@@ -34,7 +34,7 @@ struct srcu_data { /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ - spinlock_t __private lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -55,7 +55,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t __private lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ @@ -74,7 +74,7 @@ struct srcu_usage { /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t __private lock; /* Protect counters and size state. */ + raw_spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ @@ -95,6 +95,7 @@ struct srcu_usage { unsigned long reschedule_jiffies; unsigned long reschedule_count; struct delayed_work work; + struct irq_work irq_work; struct srcu_struct *srcu_ssp; }; @@ -156,7 +157,7 @@ struct srcu_struct { #define __SRCU_USAGE_INIT(name) \ { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \
diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h index 3faf5a6..74f5e73b 100644 --- a/include/linux/timb_gpio.h +++ b/include/linux/timb_gpio.h
@@ -9,10 +9,10 @@ /** * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver - * @gpio_base The number of the first GPIO pin, set to -1 for + * @gpio_base: The number of the first GPIO pin, set to -1 for * dynamic number allocation. - * @nr_pins Number of pins that is supported by the hardware (1-32) - * @irq_base If IRQ is supported by the hardware, this is the base + * @nr_pins: Number of pins that is supported by the hardware (1-32) + * @irq_base: If IRQ is supported by the hardware, this is the base * number of IRQ:s. One IRQ per pin will be used. Set to * -1 if IRQ:s is not supported. */
diff --git a/include/linux/tnum.h b/include/linux/tnum.h index fa4654f..ca2cfec 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h
@@ -131,4 +131,7 @@ static inline bool tnum_subreg_is_const(struct tnum a) return !(tnum_subreg(a)).mask; } +/* Returns the smallest member of t larger than z */ +u64 tnum_step(struct tnum t, u64 z); + #endif /* _LINUX_TNUM_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 22ca1c8..1d7f29f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } +/* + * Run RCU callback with the appropriate grace period wait for non-faultable + * tracepoints, e.g., those used in atomic context. + */ +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ + call_srcu(&tracepoint_srcu, rcu, func); +} +/* + * Run RCU callback with the appropriate grace period wait for faultable + * tracepoints, e.g., those used in syscall context. + */ +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ + call_rcu_tasks_trace(rcu, func); +} #else static inline void tracepoint_synchronize_unregister(void) { } @@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ } +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1f38042..4fe6316 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h
@@ -647,36 +647,22 @@ static inline void user_access_restore(unsigned long flags) { } /* Define RW variant so the below _mode macro expansion works */ #define masked_user_rw_access_begin(u) masked_user_access_begin(u) #define user_rw_access_begin(u, s) user_access_begin(u, s) -#define user_rw_access_end() user_access_end() /* Scoped user access */ -#define USER_ACCESS_GUARD(_mode) \ -static __always_inline void __user * \ -class_user_##_mode##_begin(void __user *ptr) \ -{ \ - return ptr; \ -} \ - \ -static __always_inline void \ -class_user_##_mode##_end(void __user *ptr) \ -{ \ - user_##_mode##_access_end(); \ -} \ - \ -DEFINE_CLASS(user_ ##_mode## _access, void __user *, \ - class_user_##_mode##_end(_T), \ - class_user_##_mode##_begin(ptr), void __user *ptr) \ - \ -static __always_inline class_user_##_mode##_access_t \ -class_user_##_mode##_access_ptr(void __user *scope) \ -{ \ - return scope; \ -} -USER_ACCESS_GUARD(read) -USER_ACCESS_GUARD(write) -USER_ACCESS_GUARD(rw) -#undef USER_ACCESS_GUARD +/* Cleanup wrapper functions */ +static __always_inline void __scoped_user_read_access_end(const void *p) +{ + user_read_access_end(); +}; +static __always_inline void __scoped_user_write_access_end(const void *p) +{ + user_write_access_end(); +}; +static __always_inline void __scoped_user_rw_access_end(const void *p) +{ + user_access_end(); +}; /** * __scoped_user_access_begin - Start a scoped user access @@ -750,13 +736,13 @@ USER_ACCESS_GUARD(rw) * * Don't use directly. Use scoped_masked_user_$MODE_access() instead. */ -#define __scoped_user_access(mode, uptr, size, elbl) \ -for (bool done = false; !done; done = true) \ - for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ - !done; done = true) \ - for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \ - /* Force modified pointer usage within the scope */ \ - for (const typeof(uptr) uptr = _tmpptr; !done; done = true) +#define __scoped_user_access(mode, uptr, size, elbl) \ +for (bool done = false; !done; done = true) \ + for (auto _tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ + !done; done = true) \ + /* Force modified pointer usage within the scope */ \ + for (const auto uptr __cleanup(__scoped_user_##mode##_access_end) = \ + _tmpptr; !done; done = true) /** * scoped_user_read_access_size - Start a scoped user read access with given size @@ -806,7 +792,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access_size - Start a scoped user read/write access with given size - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @size: Size of the access starting from @uptr * @elbl: Error label to goto when the access region is rejected * @@ -817,7 +803,7 @@ for (bool done = false; !done; done = true) \ /** * scoped_user_rw_access - Start a scoped user read/write access - * @uptr Pointer to the user space address to read from and write to + * @uptr: Pointer to the user space address to read from and write to * @elbl: Error label to goto when the access region is rejected * * The size of the access starting from @uptr is determined via sizeof(*@uptr)).
diff --git a/include/linux/usb.h b/include/linux/usb.h index fbfcc70..4aab200 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h
@@ -21,6 +21,7 @@ #include <linux/completion.h> /* for struct completion */ #include <linux/sched.h> /* for current && schedule_timeout */ #include <linux/mutex.h> /* for struct mutex */ +#include <linux/spinlock.h> /* for spinlock_t */ #include <linux/pm_runtime.h> /* for runtime PM */ struct usb_device; @@ -636,8 +637,9 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) - * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_pm_locked: prevents offload_usage changes during PM transitions. * @offload_usage: number of offload activities happening on this usb device. + * @offload_lock: protects offload_usage and offload_pm_locked * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -726,8 +728,9 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; - unsigned offload_at_suspend:1; + unsigned offload_pm_locked:1; int offload_usage; + spinlock_t offload_lock; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -849,6 +852,7 @@ static inline void usb_mark_last_busy(struct usb_device *udev) int usb_offload_get(struct usb_device *udev); int usb_offload_put(struct usb_device *udev); bool usb_offload_check(struct usb_device *udev); +void usb_offload_set_pm_locked(struct usb_device *udev, bool locked); #else static inline int usb_offload_get(struct usb_device *udev) @@ -857,6 +861,8 @@ static inline int usb_offload_put(struct usb_device *udev) { return 0; } static inline bool usb_offload_check(struct usb_device *udev) { return false; } +static inline void usb_offload_set_pm_locked(struct usb_device *udev, bool locked) +{ } #endif extern int usb_disable_lpm(struct usb_device *udev); @@ -1862,14 +1868,18 @@ void usb_free_noncoherent(struct usb_device *dev, size_t size, * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ +/* Maximum value allowed for timeout in synchronous routines below */ +#define USB_MAX_SYNCHRONOUS_TIMEOUT 60000 /* ms */ + extern int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *data, __u16 size, int timeout); extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe, void *data, int len, int *actual_length, int timeout); extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, - void *data, int len, int *actual_length, - int timeout); + void *data, int len, int *actual_length, int timeout); +extern int usb_bulk_msg_killable(struct usb_device *usb_dev, unsigned int pipe, + void *data, int len, int *actual_length, int timeout); /* wrappers around usb_control_msg() for the most common standard requests */ int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request,
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 2f7bd2f..b3cc7be 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h
@@ -78,4 +78,7 @@ /* skip BOS descriptor request */ #define USB_QUIRK_NO_BOS BIT(17) +/* Device claims zero configurations, forcing to 1 */ +#define USB_QUIRK_FORCE_ONE_CONFIG BIT(18) + #endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 2ca6082..1502b2a 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h
@@ -32,6 +32,7 @@ #define VENDOR_ID_DLINK 0x2001 #define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 +#define VENDOR_ID_TRENDNET 0x20f4 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index b0e8489..bbf799c 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h
@@ -132,6 +132,7 @@ struct driver_info { #define FLAG_MULTI_PACKET 0x2000 #define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ #define FLAG_NOARP 0x8000 /* device can't do ARP */ +#define FLAG_NOMAXMTU 0x10000 /* allow max_mtu above hard_mtu */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *);
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb7..f36d21b 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h
@@ -207,6 +207,39 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); } +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool little_endian) +{ + u16 hdr_len; + + hdr_len = skb_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(little_endian, hdr_len); +} + +/* This function must be called after virtio_net_hdr_from_skb(). */ +static inline void __virtio_net_set_tnl_hdrlen(const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + hdr_len = skb_inner_transport_offset(skb); + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP_L4) + hdr_len += sizeof(struct udphdr); + else + hdr_len += inner_tcp_hdrlen(skb); + + hdr->hdr_len = __cpu_to_virtio16(true, hdr_len); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -385,7 +418,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, bool tnl_hdr_negotiated, bool little_endian, int vlan_hlen, - bool has_data_valid) + bool has_data_valid, + bool feature_hdrlen) { struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; unsigned int inner_nh, outer_th; @@ -394,9 +428,17 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM); - if (!tnl_gso_type) - return virtio_net_hdr_from_skb(skb, hdr, little_endian, - has_data_valid, vlan_hlen); + if (!tnl_gso_type) { + ret = virtio_net_hdr_from_skb(skb, hdr, little_endian, + has_data_valid, vlan_hlen); + if (ret) + return ret; + + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_hdrlen(skb, hdr, little_endian); + + return ret; + } /* Tunnel support not negotiated but skb ask for it. */ if (!tnl_hdr_negotiated) @@ -414,6 +456,9 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (ret) return ret; + if (feature_hdrlen && hdr->hdr_len) + __virtio_net_set_tnl_hdrlen(skb, hdr); + if (skb->protocol == htons(ETH_P_IPV6)) hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; else
diff --git a/include/net/act_api.h b/include/net/act_api.h index e1e8f0f..d11b791 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h
@@ -70,6 +70,7 @@ struct tc_action { #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) +#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice.
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d3ff48a..533d8e7 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h
@@ -276,10 +276,19 @@ static inline bool vsock_net_mode_global(struct vsock_sock *vsk) return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; } -static inline void vsock_net_set_child_mode(struct net *net, +static inline bool vsock_net_set_child_mode(struct net *net, enum vsock_net_mode mode) { - WRITE_ONCE(net->vsock.child_ns_mode, mode); + int new_locked = mode + 1; + int old_locked = 0; /* unlocked */ + + if (try_cmpxchg(&net->vsock.child_ns_mode_locked, + &old_locked, new_locked)) { + WRITE_ONCE(net->vsock.child_ns_mode, mode); + return true; + } + + return old_locked == new_locked; } static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ec3af01..5172afe 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h
@@ -284,9 +284,9 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 -#define L2CAP_CR_LE_SCID_IN_USE 0X000A -#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B -#define L2CAP_CR_LE_INVALID_PARAMS 0X000C +#define L2CAP_CR_LE_SCID_IN_USE 0x000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0x000B +#define L2CAP_CR_LE_INVALID_PARAMS 0x000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req { #define L2CAP_RECONF_SUCCESS 0x0000 #define L2CAP_RECONF_INVALID_MTU 0x0001 #define L2CAP_RECONF_INVALID_MPS 0x0002 +#define L2CAP_RECONF_INVALID_CID 0x0003 +#define L2CAP_RECONF_INVALID_PARAMS 0x0004 struct l2cap_ecred_reconf_rsp { __le16 result; @@ -656,6 +658,7 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; struct ida tx_ida; + __u8 tx_ident; struct sk_buff_head pending_rx; struct work_struct pending_rx_work;
diff --git a/include/net/bonding.h b/include/net/bonding.h index 4ad5521..395c6e2 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h
@@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +bool __bond_xdp_check(int mode, int xmit_policy); bool bond_xdp_check(struct bonding *bond, int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void);
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 78a27ac..b2c359c 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h
@@ -158,6 +158,7 @@ static struct sk_buff *codel_dequeue(void *ctx, bool drop; if (!skb) { + vars->first_above_time = 0; vars->dropping = false; return skb; }
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 282e292..c16de5b 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk, { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ecb3620..5cb3056 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h
@@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops { struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); u16 net_header_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen);
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52..6d936e9 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h
@@ -264,6 +264,20 @@ inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } +static inline bool inet_use_hash2_on_bind(const struct sock *sk) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return false; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return true; + } +#endif + return sk->sk_rcv_saddr != htonl(INADDR_ANY); +} + struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); @@ -345,7 +359,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || sk->sk_addrpair != cookie) return false;
diff --git a/include/net/ip.h b/include/net/ip.h index 69d5cef..7f9abd4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h
@@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; - ipcm->protocol = inet->inet_num; + ipcm->protocol = READ_ONCE(inet->inet_num); } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4..9f8b681 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h
@@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w;
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 120db28..359b595 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h
@@ -156,6 +156,18 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); @@ -166,6 +178,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, pkt_len = -1; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } #endif #endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b4495c3..3185937 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h
@@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, siphash_aligned_key_t hash_key; u32 mp_seed; - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4021e6a..1f577a4 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h
@@ -27,6 +27,13 @@ #include <net/ip6_route.h> #endif +/* Recursion limit for tunnel xmit to detect routing loops. + * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel + * recursion involves route lookups and full IP output, consuming much + * more stack per level, so a lower limit is needed. + */ +#define IP_TUNNEL_RECURSION_LIMIT 4 + /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) @@ -658,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb) static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) { + struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats); - u64_stats_update_begin(&tstats->syncp); - u64_stats_add(&tstats->tx_bytes, pkt_len); - u64_stats_inc(&tstats->tx_packets); - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); + u64_stats_update_begin(&dstats->syncp); + u64_stats_add(&dstats->tx_bytes, pkt_len); + u64_stats_inc(&dstats->tx_packets); + u64_stats_update_end(&dstats->syncp); + put_cpu_ptr(dstats); + return; + } + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); + return; + } + pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n", + dev->pcpu_stat_type); + WARN_ON_ONCE(1); return; }
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e..82b5d21 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h
@@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; };
diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f9d969..adce214 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h
@@ -7407,7 +7407,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, * @band: the band to transmit on * @sta: optional pointer to get the station to send the frame to * - * Return: %true if the skb was prepared, %false otherwise + * Return: %true if the skb was prepared, %false otherwise. + * On failure, the skb is freed by this function; callers must not + * free it again. * * Note: must be called under RCU lock */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3384859..8883575 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h
@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +static inline void lockdep_nfct_expect_lock_held(void) +{ + lockdep_assert_held(&nf_conntrack_expect_lock); +} + /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 165e7a0..e9a8350 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -22,10 +22,16 @@ struct nf_conntrack_expect { /* Hash member */ struct hlist_node hnode; + /* Network namespace */ + possible_net_t net; + /* We expect this tuple, with the following mask */ struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* Usage count. */ refcount_t use; @@ -40,7 +46,7 @@ struct nf_conntrack_expect { struct nf_conntrack_expect *this); /* Helper to assign to new connection */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; /* The conntrack of the master connection */ struct nf_conn *master; @@ -62,7 +68,17 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { - return nf_ct_net(exp->master); + return read_pnet(&exp->net); +} + +static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a, + const struct nf_conntrack_zone *b) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + return a->zone.id == b->id; +#else + return true; +#endif } #define NF_CT_EXP_POLICY_NAME_LEN 16
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 426534a..ec8a8ec 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h
@@ -277,8 +277,6 @@ struct nft_userdata { unsigned char data[]; }; -#define NFT_SET_ELEM_INTERNAL_LAST 0x1 - /* placeholder structure for opaque set element backend representation. */ struct nft_elem_priv { }; @@ -288,7 +286,6 @@ struct nft_elem_priv { }; * @key: element key * @key_end: closing element key * @data: element data - * @flags: flags * @priv: element private data and extensions */ struct nft_set_elem { @@ -304,7 +301,6 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - u32 flags; struct nft_elem_priv *priv; }; @@ -320,11 +316,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, + NFT_ITER_UPDATE_CLONE, }; struct nft_set; @@ -876,6 +874,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr); void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr); @@ -1861,6 +1861,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) {
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6682e51..2073cba 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h
@@ -17,6 +17,7 @@ struct netns_mpls { size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; struct mutex platform_mutex; + seqcount_mutex_t platform_label_seq; struct ctl_table_header *ctl; };
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h index b34d69a..dc8cbe4 100644 --- a/include/net/netns/vsock.h +++ b/include/net/netns/vsock.h
@@ -17,5 +17,8 @@ struct netns_vsock { enum vsock_net_mode mode; enum vsock_net_mode child_ns_mode; + + /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ + int child_ns_mode_locked; }; #endif /* __NET_NET_NAMESPACE_VSOCK_H */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 23dd647..b73983a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h
@@ -59,7 +59,7 @@ struct netns_xfrm { struct list_head inexact_bins; - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime;
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 0d45348..cdd9547 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h
@@ -247,7 +247,7 @@ struct page_pool { /* User-facing fields, protected by page_pools_lock */ struct { struct hlist_node list; - u64 detach_time; + ktime_t detach_time; u32 id; } user; };
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268..c3d6573 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h
@@ -716,6 +716,34 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); + +static inline void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } +} + #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); @@ -778,13 +806,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; + bool nolock; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } } } } @@ -1419,6 +1457,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp) +{ + return !!miniqp->p_miniq; +} + void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index cddebaf..6f99622 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@ #include <linux/types.h> struct net; +extern struct net init_net; + +union tcp_seq_and_ts_off { + struct { + u32 seq; + u32 ts_off; + }; + u64 hash64; +}; u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr); +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} + +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} #endif /* _NET_SECURE_SEQ */
diff --git a/include/net/sock.h b/include/net/sock.h index 66b5628..6c9a8301 100644 --- a/include/net/sock.h +++ b/include/net/sock.h
@@ -2098,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk->sk_socket = sock; + WRITE_ONCE(sk->sk_socket, sock); if (sock) { WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index b147a3b..e0fded1 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list)
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c7f24a2..24d4d5a 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params { u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; - + struct list_head metalist; struct rcu_head rcu; }; struct tcf_ife_info { struct tc_action common; struct tcf_ife_params __rcu *params; - /* list of metaids allowed */ - struct list_head metalist; }; #define to_ife(a) ((struct tcf_ife_info *)a)
diff --git a/include/net/tcp.h b/include/net/tcp.h index 40e72b9c..978eea2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h
@@ -43,6 +43,7 @@ #include <net/dst.h> #include <net/mptcp.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -544,7 +545,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int tcp_connect(struct sock *sk); @@ -2462,8 +2465,9 @@ struct tcp_request_sock_ops { struct flowi *fl, struct request_sock *req, u32 tw_isn); - u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( + const struct net *net, + const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc,
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d9c6d04..fc1fc43 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h
@@ -52,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - return 0; + return -EPFNOSUPPORT; } #endif
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 242e34f..6b9ebae 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h
@@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -122,7 +127,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { - list_del(&pos->list_node); + list_del_init(&pos->list_node); xp_free(pos); } @@ -157,7 +162,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->list_node); + list_del_init(&frag->list_node); ret = &frag->xdp; } @@ -168,7 +173,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - list_del(&xskb->list_node); + list_del_init(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) @@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) {
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 6de6fd8..d639ff8 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h
@@ -181,7 +181,7 @@ void rdma_destroy_id(struct rdma_cm_id *id); * * It needs to be called before the RDMA identifier is bound * to an device, which mean it should be called before - * rdma_bind_addr(), rdma_bind_addr() and rdma_listen(). + * rdma_bind_addr(), rdma_resolve_addr() and rdma_listen(). */ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type);
diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index ae1e148..28f9f59 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h
@@ -406,6 +406,7 @@ extern const char * const cs35l56_cal_set_status_text[3]; extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; +int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base); int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 79bd5a7..0e871c78 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h
@@ -27,11 +27,6 @@ struct sdca_function_desc; #define SDCA_MAX_ENTITY_COUNT 128 /* - * Sanity check on number of initialization writes, can be expanded if needed. - */ -#define SDCA_MAX_INIT_COUNT 2048 - -/* * The Cluster IDs are 16-bit, so a maximum of 65535 Clusters per * function can be represented, however limit this to a slightly * more reasonable value. Can be expanded if needed.
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 7c03bdc..e847cf5 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h
@@ -151,6 +151,7 @@ struct tasdevice { struct bulk_reg_val *cali_data_backup; struct bulk_reg_val alp_cali_bckp; struct tasdevice_fw *cali_data_fmw; + void *cali_specific; unsigned int dev_addr; unsigned int err_code; unsigned char cur_book;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 125bdc1..0864700 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h
@@ -769,12 +769,15 @@ TRACE_EVENT(btrfs_sync_file, ), TP_fast_assign( - const struct dentry *dentry = file->f_path.dentry; - const struct inode *inode = d_inode(dentry); + struct dentry *dentry = file_dentry(file); + struct inode *inode = file_inode(file); + struct dentry *parent = dget_parent(dentry); + struct inode *parent_inode = d_inode(parent); - TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb)); + dput(parent); + TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); - __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent))); + __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); __entry->datasync = datasync; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ),
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 33e99e79..63597b0 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h
@@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE); { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ { DMA_ATTR_NO_WARN, "NO_WARN" }, \ { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ - { DMA_ATTR_MMIO, "MMIO" }) + { DMA_ATTR_MMIO, "MMIO" }, \ + { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \ + { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 7f93e75..cd7920c 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h
@@ -440,7 +440,13 @@ TRACE_EVENT(rss_stat, TP_fast_assign( __entry->mm_id = mm_ptr_to_hash(mm); - __entry->curr = !!(current->mm == mm); + /* + * curr is true if the mm matches the current task's mm_struct. + * Since kthreads (PF_KTHREAD) have no mm_struct of their own + * but can borrow one via kthread_use_mm(), we must filter them + * out to avoid incorrectly attributing the RSS update to them. + */ + __entry->curr = current->mm == mm && !(current->flags & PF_KTHREAD); __entry->member = member; __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member]) << PAGE_SHIFT);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 64a382f..cbe2821 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h
@@ -57,6 +57,7 @@ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_intr, "INTR ") \ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ @@ -169,7 +170,8 @@ EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ EM(netfs_sreq_trace_put_work, "PUT WORK ") \ - E_(netfs_sreq_trace_put_terminated, "PUT TERM ") + EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \ + E_(netfs_sreq_trace_see_failed, "SEE FAILED ") #define netfs_folio_traces \ EM(netfs_folio_is_uptodate, "mod-uptodate") \ @@ -738,19 +740,19 @@ TRACE_EVENT(netfs_collect_stream, __field(unsigned int, wreq) __field(unsigned char, stream) __field(unsigned long long, collected_to) - __field(unsigned long long, front) + __field(unsigned long long, issued_to) ), TP_fast_assign( __entry->wreq = wreq->debug_id; __entry->stream = stream->stream_nr; __entry->collected_to = stream->collected_to; - __entry->front = stream->front ? stream->front->start : UINT_MAX; + __entry->issued_to = atomic64_read(&wreq->issued_to); ), - TP_printk("R=%08x[%x:] cto=%llx frn=%llx", + TP_printk("R=%08x[%x:] cto=%llx ito=%llx", __entry->wreq, __entry->stream, - __entry->collected_to, __entry->front) + __entry->collected_to, __entry->issued_to) ); TRACE_EVENT(netfs_folioq,
diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 4f07596..b9a129e 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h
@@ -38,19 +38,22 @@ TRACE_EVENT(task_rename, TP_ARGS(task, comm), TP_STRUCT__entry( + __field( pid_t, pid) __array( char, oldcomm, TASK_COMM_LEN) __array( char, newcomm, TASK_COMM_LEN) __field( short, oom_score_adj) ), TP_fast_assign( + __entry->pid = task->pid; memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); strscpy(entry->newcomm, comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd", - __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj) + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd", + __entry->pid, __entry->oldcomm, + __entry->newcomm, __entry->oom_score_adj) ); /**
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index e527b24..c89aede 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h
@@ -401,8 +401,8 @@ extern "C" { * implementation can multiply the values by 2^6=64. For that reason the padding * must only contain zeros. * index 0 = Y plane, [15:0] z:Y [6:10] little endian - * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian - * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian + * index 1 = Cb plane, [15:0] z:Cb [6:10] little endian + * index 2 = Cr plane, [15:0] z:Cr [6:10] little endian */ #define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ #define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ @@ -414,8 +414,8 @@ extern "C" { * implementation can multiply the values by 2^4=16. For that reason the padding * must only contain zeros. * index 0 = Y plane, [15:0] z:Y [4:12] little endian - * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian - * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian + * index 1 = Cb plane, [15:0] z:Cb [4:12] little endian + * index 2 = Cr plane, [15:0] z:Cr [4:12] little endian */ #define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ #define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ @@ -424,8 +424,8 @@ extern "C" { /* * 3 plane YCbCr * index 0 = Y plane, [15:0] Y little endian - * index 1 = Cr plane, [15:0] Cr little endian - * index 2 = Cb plane, [15:0] Cb little endian + * index 1 = Cb plane, [15:0] Cb little endian + * index 2 = Cr plane, [15:0] Cr little endian */ #define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ #define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 5a6fda6..e827c9d 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h
@@ -20,6 +20,7 @@ #ifndef _DMA_BUF_UAPI_H_ #define _DMA_BUF_UAPI_H_ +#include <linux/ioctl.h> #include <linux/types.h> /**
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6750c38..1ff1614 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h
@@ -188,7 +188,8 @@ enum io_uring_sqe_flags_bit { /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets - * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + * IORING_SQ_TASKRUN in the sq ring flags. Not valid without COOP_TASKRUN + * or DEFER_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 65500f5..80364d4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h
@@ -14,6 +14,10 @@ #include <linux/ioctl.h> #include <asm/kvm.h> +#ifdef __KERNEL__ +#include <linux/kvm_types.h> +#endif + #define KVM_API_VERSION 12 /* @@ -1601,7 +1605,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 2607102..56b6b60 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events { #define NF_CT_EXPECT_INACTIVE 0x2 #define NF_CT_EXPECT_USERSPACE 0x4 +#ifdef __KERNEL__ +#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \ + NF_CT_EXPECT_USERSPACE) +#endif #endif /* _UAPI_NF_CONNTRACK_COMMON_H */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ec1c54b..14f634a 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h
@@ -712,7 +712,7 @@ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ #define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x34 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 863c4a0..f69344f 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl { }; /* - * struct rseq is aligned on 4 * 8 bytes to ensure it is always - * contained within a single cache-line. + * The original size and alignment of the allocation for struct rseq is + * 32 bytes. * - * A single struct rseq per thread is allowed. + * The allocation size needs to be greater or equal to + * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to + * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32). + * + * As an alternative, userspace is allowed to use both the original size + * and alignment of 32 bytes for backward compatibility. + * + * A single active struct rseq registration per thread is allowed. */ struct rseq { /* @@ -181,9 +188,20 @@ struct rseq { struct rseq_slice_ctrl slice_ctrl; /* + * Before rseq became extensible, its original size was 32 bytes even + * though the active rseq area was only 20 bytes. + * Exposing a 32 bytes feature size would make life needlessly painful + * for userspace. Therefore, add a reserved byte after byte 32 + * to bump the rseq feature size from 32 to 33. + * The next field to be added to the rseq area will be larger + * than one byte, and will replace this reserved byte. + */ + __u8 __reserved; + + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(32))); #endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index c94caf8..8ca1574 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h
@@ -80,6 +80,7 @@ struct xenbus_device { const char *devicetype; const char *nodename; const char *otherend; + bool vanished; int otherend_id; struct xenbus_watch otherend_watch; struct device dev; @@ -228,7 +229,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port); int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port); -enum xenbus_state xenbus_read_driver_state(const char *path); +enum xenbus_state xenbus_read_driver_state(const struct xenbus_device *dev, + const char *path); __printf(3, 4) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
diff --git a/init/Kconfig b/init/Kconfig index c25869c..7484cd7 100644 --- a/init/Kconfig +++ b/init/Kconfig
@@ -146,14 +146,14 @@ config CC_HAS_COUNTED_BY_PTR bool # supported since clang 22 - default y if CC_IS_CLANG && CLANG_VERSION >= 220000 + default y if CC_IS_CLANG && CLANG_VERSION >= 220100 # supported since gcc 16.0.0 default y if CC_IS_GCC && GCC_VERSION >= 160000 config CC_HAS_BROKEN_COUNTED_BY_REF bool # https://github.com/llvm/llvm-project/issues/182575 - default y if CC_IS_CLANG && CLANG_VERSION < 220000 + default y if CC_IS_CLANG && CLANG_VERSION < 220100 config CC_HAS_MULTIDIMENSIONAL_NONSTRING def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) @@ -1902,7 +1902,7 @@ default n depends on IO_URING help - Enable mock files for io_uring subststem testing. The ABI might + Enable mock files for io_uring subsystem testing. The ABI might still change, so it's still experimental and should only be enabled for specific test purposes.
diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c index 6a98750..c003763 100644 --- a/io_uring/bpf_filter.c +++ b/io_uring/bpf_filter.c
@@ -85,7 +85,7 @@ int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters, do { if (filter == &dummy_filter) return -EACCES; - ret = bpf_prog_run(filter->prog, &bpf_ctx); + ret = bpf_prog_run_pin_on_cpu(filter->prog, &bpf_ctx); if (!ret) return -EACCES; filter = filter->next;
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c index 57ddaf8..125a81c 100644 --- a/io_uring/cmd_net.c +++ b/io_uring/cmd_net.c
@@ -146,7 +146,7 @@ static int io_uring_cmd_getsockname(struct socket *sock, return -EINVAL; uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ulen = u64_to_user_ptr(sqe->addr3); + ulen = u64_to_user_ptr(READ_ONCE(sqe->addr3)); peer = READ_ONCE(sqe->optlen); if (peer > 1) return -EINVAL;
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index cbea1c2..7482a7d 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c
@@ -76,11 +76,15 @@ void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) { bool skip = false; struct io_ev_fd *ev_fd; - - if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) - return; + struct io_rings *rings; guard(rcu)(); + + rings = rcu_dereference(ctx->rings_rcu); + if (!rings) + return; + if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) + return; ev_fd = rcu_dereference(ctx->io_ev_fd); /* * Check again if ev_fd exists in case an io_eventfd_unregister call
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 80178b6..c2d3e45 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c
@@ -119,12 +119,14 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) sq_idx); break; } - if ((++sq_head & sq_mask) == 0) { + if (sq_idx == sq_mask) { seq_printf(m, "%5u: corrupted sqe, wrapping 128B entry\n", sq_idx); break; } + sq_head++; + i++; sqe128 = true; } seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index aa95703..4d7bcbb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c
@@ -1745,7 +1745,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, * well as 2 contiguous entries. */ if (!(ctx->flags & IORING_SETUP_SQE_MIXED) || *left < 2 || - !(ctx->cached_sq_head & (ctx->sq_entries - 1))) + (unsigned)(sqe - ctx->sq_sqes) >= ctx->sq_entries - 1) return io_init_fail_req(req, -EINVAL); /* * A 128b operation on a mixed SQ uses two entries, so we have @@ -2015,7 +2015,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) if (ctx->flags & IORING_SETUP_SQ_REWIND) entries = ctx->sq_entries; else - entries = io_sqring_entries(ctx); + entries = __io_sqring_entries(ctx); entries = min(nr, entries); if (unlikely(!entries)) @@ -2066,6 +2066,7 @@ static void io_rings_free(struct io_ring_ctx *ctx) io_free_region(ctx->user, &ctx->sq_region); io_free_region(ctx->user, &ctx->ring_region); ctx->rings = NULL; + RCU_INIT_POINTER(ctx->rings_rcu, NULL); ctx->sq_sqes = NULL; } @@ -2249,7 +2250,9 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) */ poll_wait(file, &ctx->poll_wq, wait); - if (!io_sqring_full(ctx)) + rcu_read_lock(); + + if (!__io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; /* @@ -2269,6 +2272,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (__io_cqring_events_user(ctx) || io_has_work(ctx)) mask |= EPOLLIN | EPOLLRDNORM; + rcu_read_unlock(); return mask; } @@ -2703,6 +2707,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, if (ret) return ret; ctx->rings = rings = io_region_get_ptr(&ctx->ring_region); + rcu_assign_pointer(ctx->rings_rcu, rings); if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 0fa844f..ee24bc5 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h
@@ -142,16 +142,28 @@ struct io_wait_queue { #endif }; +static inline struct io_rings *io_get_rings(struct io_ring_ctx *ctx) +{ + return rcu_dereference_check(ctx->rings_rcu, + lockdep_is_held(&ctx->uring_lock) || + lockdep_is_held(&ctx->completion_lock)); +} + static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; - int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; + struct io_rings *rings; + int dist; + + guard(rcu)(); + rings = io_get_rings(ctx); /* * Wake up if we have enough events, or if a timeout occurred since we * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ + dist = READ_ONCE(rings->cq.tail) - (int) iowq->cq_tail; return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -431,9 +443,9 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx) __io_wq_wake(&ctx->cq_wait); } -static inline bool io_sqring_full(struct io_ring_ctx *ctx) +static inline bool __io_sqring_full(struct io_ring_ctx *ctx) { - struct io_rings *r = ctx->rings; + struct io_rings *r = io_get_rings(ctx); /* * SQPOLL must use the actual sqring head, as using the cached_sq_head @@ -445,9 +457,15 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx) return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries; } -static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +static inline bool io_sqring_full(struct io_ring_ctx *ctx) { - struct io_rings *rings = ctx->rings; + guard(rcu)(); + return __io_sqring_full(ctx); +} + +static inline unsigned int __io_sqring_entries(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = io_get_rings(ctx); unsigned int entries; /* make sure SQ entry isn't read before tail */ @@ -455,6 +473,12 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) return min(entries, ctx->sq_entries); } +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +{ + guard(rcu)(); + return __io_sqring_entries(ctx); +} + /* * Don't complete immediately but use deferred completion infrastructure. * Protected by ->uring_lock and can only be used either with
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2ffa95b1..5257b3a 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c
@@ -34,6 +34,10 @@ struct io_provide_buf { static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { + /* No data consumed, return false early to avoid consuming the buffer */ + if (!len) + return false; + while (len) { struct io_uring_buf *buf; u32 buf_len, this_len; @@ -111,9 +115,18 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) buf = req->kbuf; bl = io_buffer_get_list(ctx, buf->bgid); - list_add(&buf->list, &bl->buf_list); - bl->nbufs++; + /* + * If the buffer list was upgraded to a ring-based one, or removed, + * while the request was in-flight in io-wq, drop it. + */ + if (bl && !(bl->flags & IOBL_BUF_RING)) { + list_add(&buf->list, &bl->buf_list); + bl->nbufs++; + } else { + kfree(buf); + } req->flags &= ~REQ_F_BUFFER_SELECTED; + req->kbuf = NULL; io_ring_submit_unlock(ctx, issue_flags); return true; @@ -203,7 +216,8 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); if (io_should_commit(req, issue_flags)) { - io_kbuf_commit(req, sel.buf_list, *len, 1); + if (!io_kbuf_commit(req, sel.buf_list, *len, 1)) + req->flags |= REQ_F_BUF_MORE; sel.buf_list = NULL; } return sel; @@ -336,7 +350,8 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, */ if (ret > 0) { req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; - io_kbuf_commit(req, sel->buf_list, arg->out_len, ret); + if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret)) + req->flags |= REQ_F_BUF_MORE; } } else { ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); @@ -382,8 +397,10 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, if (bl) ret = io_kbuf_commit(req, bl, len, nr); + if (ret && (req->flags & REQ_F_BUF_MORE)) + ret = false; - req->flags &= ~REQ_F_BUFFER_RING; + req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE); return ret; }
diff --git a/io_uring/net.c b/io_uring/net.c index 8576c6c..8885d94 100644 --- a/io_uring/net.c +++ b/io_uring/net.c
@@ -375,6 +375,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_namelen = addr_len; } if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + if (sr->flags & IORING_SEND_VECTORIZED) + return -EINVAL; req->flags |= REQ_F_IMPORT_BUFFER; return 0; } @@ -419,6 +421,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->done_io = 0; sr->len = READ_ONCE(sqe->len); + if (unlikely(sr->len < 0)) + return -EINVAL; sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) return -EINVAL; @@ -789,6 +793,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); + if (unlikely(sr->len < 0)) + return -EINVAL; sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~RECVMSG_FLAGS) return -EINVAL;
diff --git a/io_uring/poll.c b/io_uring/poll.c index b671b84..2e9ee47 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c
@@ -272,6 +272,7 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); v &= ~IO_POLL_RETRY_FLAG; } + v &= IO_POLL_REF_MASK; } /* the mask was stashed in __io_poll_execute */ @@ -304,8 +305,13 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - int ret = io_poll_issue(req, tw); + int ret; + /* multiple refs and HUP, ensure we loop once more */ + if ((req->cqe.res & (POLLHUP | POLLRDHUP)) && v != 1) + v--; + + ret = io_poll_issue(req, tw); if (ret == IOU_COMPLETE) return IOU_POLL_REMOVE_POLL_USE_RES; else if (ret == IOU_REQUEUE) @@ -321,7 +327,6 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); io_napi_add(req);
diff --git a/io_uring/register.c b/io_uring/register.c index 6015a3e..05362fe 100644 --- a/io_uring/register.c +++ b/io_uring/register.c
@@ -178,9 +178,17 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx, return -EBUSY; ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); - /* Reset all restrictions if an error happened */ + /* + * Reset all restrictions if an error happened, but retain any COW'ed + * settings. + */ if (ret < 0) { + struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters; + bool cowed = ctx->restrictions.bpf_filters_cow; + memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); + ctx->restrictions.bpf_filters = bpf; + ctx->restrictions.bpf_filters_cow = cowed; return ret; } if (ctx->restrictions.op_registered) @@ -202,7 +210,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args) return -EPERM; /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -238,7 +246,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args) /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -633,7 +641,15 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) ctx->sq_entries = p->sq_entries; ctx->cq_entries = p->cq_entries; + /* + * Just mark any flag we may have missed and that the application + * should act on unconditionally. Worst case it'll be an extra + * syscall. + */ + atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags); ctx->rings = n.rings; + rcu_assign_pointer(ctx->rings_rcu, n.rings); + ctx->sq_sqes = n.sq_sqes; swap_old(ctx, o, n, ring_region); swap_old(ctx, o, n, sq_region); @@ -642,6 +658,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); + /* Wait for concurrent io_ctx_mark_taskrun() */ + if (to_free == &o) + synchronize_rcu_expedited(); io_register_free_rings(ctx, to_free); if (ctx->sq_data)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 4fa59bf..1b96ab5 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c
@@ -1061,6 +1061,10 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, return ret; if (!(imu->dir & (1 << ddir))) return -EFAULT; + if (unlikely(!len)) { + iov_iter_bvec(iter, ddir, NULL, 0, 0); + return 0; + } offset = buf_addr - imu->ubuf;
diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 84dda24..cb61d48 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c
@@ -462,7 +462,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) tr->ltimeout = true; if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2)))) return -EFAULT; if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) return -EINVAL; @@ -557,7 +557,7 @@ static int __io_timeout_prep(struct io_kiocb *req, data->req = req; data->flags = flags; - if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr)))) return -EFAULT; if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
diff --git a/io_uring/tw.c b/io_uring/tw.c index 1ee2b8a..2f2b4ac 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c
@@ -152,6 +152,21 @@ void tctx_task_work(struct callback_head *cb) WARN_ON_ONCE(ret); } +/* + * Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the + * RCU protected rings pointer to be safe against concurrent ring resizing. + */ +static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx) +{ + lockdep_assert_in_rcu_read_lock(); + + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) { + struct io_rings *rings = rcu_dereference(ctx->rings_rcu); + + atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags); + } +} + void io_req_local_work_add(struct io_kiocb *req, unsigned flags) { struct io_ring_ctx *ctx = req->ctx; @@ -206,8 +221,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags) */ if (!head) { - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) - atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + io_ctx_mark_taskrun(ctx); if (ctx->has_evfd) io_eventfd_signal(ctx, false); } @@ -231,6 +245,10 @@ void io_req_normal_work_add(struct io_kiocb *req) if (!llist_add(&req->io_task_work.node, &tctx->task_list)) return; + /* + * Doesn't need to use ->rings_rcu, as resizing isn't supported for + * !DEFER_TASKRUN. + */ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
diff --git a/io_uring/wait.c b/io_uring/wait.c index 0581cad..91df86c 100644 --- a/io_uring/wait.c +++ b/io_uring/wait.c
@@ -79,12 +79,15 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) if (io_has_work(ctx)) goto out_wake; /* got events since we started waiting, min timeout is done */ - if (iowq->cq_min_tail != READ_ONCE(ctx->rings->cq.tail)) - goto out_wake; - /* if we have any events and min timeout expired, we're done */ - if (io_cqring_events(ctx)) - goto out_wake; + scoped_guard(rcu) { + struct io_rings *rings = io_get_rings(ctx); + if (iowq->cq_min_tail != READ_ONCE(rings->cq.tail)) + goto out_wake; + /* if we have any events and min timeout expired, we're done */ + if (io_cqring_events(ctx)) + goto out_wake; + } /* * If using deferred task_work running and application is waiting on * more than one request, ensure we reset it now where we are switching @@ -186,9 +189,9 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, struct ext_arg *ext_arg) { struct io_wait_queue iowq; - struct io_rings *rings = ctx->rings; + struct io_rings *rings; ktime_t start_time; - int ret; + int ret, nr_wait; min_events = min_t(int, min_events, ctx->cq_entries); @@ -201,15 +204,23 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))) io_cqring_do_overflow_flush(ctx); - if (__io_cqring_events_user(ctx) >= min_events) + + rcu_read_lock(); + rings = io_get_rings(ctx); + if (__io_cqring_events_user(ctx) >= min_events) { + rcu_read_unlock(); return 0; + } init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); iowq.ctx = ctx; - iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; - iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail); + iowq.cq_tail = READ_ONCE(rings->cq.head) + min_events; + iowq.cq_min_tail = READ_ONCE(rings->cq.tail); + nr_wait = (int) iowq.cq_tail - READ_ONCE(rings->cq.tail); + rcu_read_unlock(); + rings = NULL; iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); iowq.hit_timeout = 0; iowq.min_timeout = ext_arg->min_time; @@ -240,14 +251,6 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, trace_io_uring_cqring_wait(ctx, min_events); do { unsigned long check_cq; - int nr_wait; - - /* if min timeout has been hit, don't reset wait count */ - if (!iowq.hit_timeout) - nr_wait = (int) iowq.cq_tail - - READ_ONCE(ctx->rings->cq.tail); - else - nr_wait = 1; if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { atomic_set(&ctx->cq_wait_nr, nr_wait); @@ -298,11 +301,20 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, break; } cond_resched(); + + /* if min timeout has been hit, don't reset wait count */ + if (!iowq.hit_timeout) + scoped_guard(rcu) + nr_wait = (int) iowq.cq_tail - + READ_ONCE(io_get_rings(ctx)->cq.tail); + else + nr_wait = 1; } while (1); if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) finish_wait(&ctx->cq_wait, &iowq.wq); restore_saved_sigmask_unless(ret == -EINTR); - return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; + guard(rcu)(); + return READ_ONCE(io_get_rings(ctx)->cq.head) == READ_ONCE(io_get_rings(ctx)->cq.tail) ? ret : 0; }
diff --git a/io_uring/wait.h b/io_uring/wait.h index 5e236f7..3a145fcf 100644 --- a/io_uring/wait.h +++ b/io_uring/wait.h
@@ -28,12 +28,15 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx); static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) { - return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); + struct io_rings *rings = io_get_rings(ctx); + return ctx->cached_cq_tail - READ_ONCE(rings->cq.head); } static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx) { - return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head); + struct io_rings *rings = io_get_rings(ctx); + + return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); } /*
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index cf5bec0..62d6932 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c
@@ -837,7 +837,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto netdev_put_unlock; - mp_param.rx_page_size = 1U << ifq->niov_shift; + if (reg.rx_buf_len) + mp_param.rx_page_size = 1U << ifq->niov_shift; mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL); @@ -926,11 +927,12 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe, struct io_zcrx_ifq *ifq, struct net_iov **ret_niov) { + __u64 off = READ_ONCE(rqe->off); unsigned niov_idx, area_idx; struct io_zcrx_area *area; - area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT; - niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift; + area_idx = off >> IORING_ZCRX_AREA_SHIFT; + niov_idx = (off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift; if (unlikely(rqe->__pad || area_idx)) return false;
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 144f30e..f355cf1 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c
@@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key, return -EOPNOTSUPP; } -static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { return 0;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 26763df..33de68c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c
@@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } -static int array_map_check_btf(const struct bpf_map *map, +static int array_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 35e1ddc..b73336c 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c
@@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key, return -EINVAL; } -static int bloom_map_check_btf(const struct bpf_map *map, +static int bloom_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c index c0286f25..a2f84afe 100644 --- a/kernel/bpf/bpf_insn_array.c +++ b/kernel/bpf/bpf_insn_array.c
@@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int insn_array_check_btf(const struct bpf_map *map, +static int insn_array_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b28f07d..9c96a44 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c
@@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; - /* If RCU Tasks Trace grace period implies RCU grace period, do - * kfree(), else do kfree_rcu(). + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. */ local_storage = container_of(rcu, struct bpf_local_storage, rcu); - if (rcu_trace_implies_rcu_gp()) - kfree(local_storage); - else - kfree_rcu(local_storage, rcu); + kfree(local_storage); } /* Handle use_kmalloc_nolock == false */ @@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_local_storage_free_rcu(rcu); - else - call_rcu(rcu, bpf_local_storage_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_local_storage_free_rcu(rcu); } static void bpf_local_storage_free(struct bpf_local_storage *local_storage, @@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, bpf_local_storage_free_trace_rcu); } +/* rcu callback for use_kmalloc_nolock == false */ +static void __bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */ + smap = rcu_dereference_check(SDATA(selem)->smap, 1); + + if (smap) + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + kfree(selem); +} + /* rcu tasks trace callback for use_kmalloc_nolock == false */ static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - struct bpf_local_storage_elem *selem; - - selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - if (rcu_trace_implies_rcu_gp()) - kfree(selem); - else - kfree_rcu(selem, rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + __bpf_selem_free_rcu(rcu); } /* Handle use_kmalloc_nolock == false */ @@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem, bool vanilla_rcu) { if (vanilla_rcu) - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, __bpf_selem_free_rcu); else call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); } @@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) /* The bpf_local_storage_map_free will wait for rcu_barrier */ smap = rcu_dereference_check(SDATA(selem)->smap, 1); - if (smap) { - migrate_disable(); + if (smap) bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - migrate_enable(); - } kfree_nolock(selem); } static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_rcu(rcu); - else - call_rcu(rcu, bpf_selem_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_selem_free_rcu(rcu); } void bpf_selem_free(struct bpf_local_storage_elem *selem, bool reuse_now) { - struct bpf_local_storage_map *smap; - - smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - if (!selem->use_kmalloc_nolock) { /* * No uptr will be unpin even when reuse_now == false since uptr * is only supported in task local storage, where * smap->use_kmalloc_nolock == true. */ - if (smap) - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); __bpf_selem_free(selem, reuse_now); return; } @@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) @@ -958,10 +962,9 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); - if (smap->use_kmalloc_nolock) { - rcu_barrier_tasks_trace(); - rcu_barrier(); - } + /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */ + rcu_barrier_tasks_trace(); + rcu_barrier(); kvfree(smap->buckets); bpf_map_area_free(smap); }
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4872d2a..71f9143 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c
@@ -1787,7 +1787,16 @@ static void btf_free_id(struct btf *btf) * of the _bh() version. */ spin_lock_irqsave(&btf_idr_lock, flags); - idr_remove(&btf_idr, btf->id); + if (btf->id) { + idr_remove(&btf_idr, btf->id); + /* + * Clear the id here to make this function idempotent, since it will get + * called a couple of times for module BTFs: on module unload, and then + * the final btf_put(). btf_alloc_id() starts IDs with 1, so we can use + * 0 as sentinel value. + */ + WRITE_ONCE(btf->id, 0); + } spin_unlock_irqrestore(&btf_idr_lock, flags); } @@ -8115,7 +8124,7 @@ static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp) { const struct btf *btf = filp->private_data; - seq_printf(m, "btf_id:\t%u\n", btf->id); + seq_printf(m, "btf_id:\t%u\n", READ_ONCE(btf->id)); } #endif @@ -8197,7 +8206,7 @@ int btf_get_info_by_fd(const struct btf *btf, if (copy_from_user(&info, uinfo, info_copy)) return -EFAULT; - info.id = btf->id; + info.id = READ_ONCE(btf->id); ubtf = u64_to_user_ptr(info.btf); btf_copy = min_t(u32, btf->data_size, info.btf_size); if (copy_to_user(ubtf, btf->data, btf_copy)) @@ -8260,7 +8269,7 @@ int btf_get_fd_by_id(u32 id) u32 btf_obj_id(const struct btf *btf) { - return btf->id; + return READ_ONCE(btf->id); } bool btf_is_kernel(const struct btf *btf) @@ -8382,6 +8391,13 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, if (btf_mod->module != module) continue; + /* + * For modules, we do the freeing of BTF IDR as soon as + * module goes away to disable BTF discovery, since the + * btf_try_get_module() on such BTFs will fail. This may + * be called again on btf_put(), but it's ok to do so. + */ + btf_free_id(btf_mod->btf); list_del(&btf_mod->list); if (btf_mod->sysfs_attr) sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3ece2da..7b675a45 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c
@@ -1422,6 +1422,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); break; + + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ + from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + /* + * Cannot use BPF_STX_MEM() macro here as it + * hardcodes BPF_MEM mode, losing PROBE_MEM32 + * and breaking arena addressing in the JIT. + */ + *to++ = (struct bpf_insn) { + .code = BPF_STX | BPF_PROBE_MEM32 | + BPF_SIZE(from->code), + .dst_reg = from->dst_reg, + .src_reg = BPF_REG_AX, + .off = from->off, + }; + break; } out: return to - to_buff; @@ -1736,6 +1757,12 @@ bool bpf_opcode_in_insntable(u8 code) } #ifndef CONFIG_BPF_JIT_ALWAYS_ON +/* Absolute value of s32 without undefined behavior for S32_MIN */ +static u32 abs_s32(s32 x) +{ + return x >= 0 ? (u32)x : -(u32)x; +} + /** * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers @@ -1900,8 +1927,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = do_div(AX, (u32) SRC); break; case 1: - AX = abs((s32)DST); - AX = do_div(AX, abs((s32)SRC)); + AX = abs_s32((s32)DST); + AX = do_div(AX, abs_s32((s32)SRC)); if ((s32)DST < 0) DST = (u32)-AX; else @@ -1928,8 +1955,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = do_div(AX, (u32) IMM); break; case 1: - AX = abs((s32)DST); - AX = do_div(AX, abs((s32)IMM)); + AX = abs_s32((s32)DST); + AX = do_div(AX, abs_s32((s32)IMM)); if ((s32)DST < 0) DST = (u32)-AX; else @@ -1955,8 +1982,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = (u32) AX; break; case 1: - AX = abs((s32)DST); - do_div(AX, abs((s32)SRC)); + AX = abs_s32((s32)DST); + do_div(AX, abs_s32((s32)SRC)); if (((s32)DST < 0) == ((s32)SRC < 0)) DST = (u32)AX; else @@ -1982,8 +2009,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) DST = (u32) AX; break; case 1: - AX = abs((s32)DST); - do_div(AX, abs((s32)IMM)); + AX = abs_s32((s32)DST); + do_div(AX, abs_s32((s32)IMM)); if (((s32)DST < 0) == ((s32)IMM < 0)) DST = (u32)AX; else
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 04171fb..32b43cb 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c
@@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> +#include <linux/local_lock.h> #include <linux/completion.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -52,6 +53,7 @@ struct xdp_bulk_queue { struct list_head flush_node; struct bpf_cpu_map_entry *obj; unsigned int count; + local_lock_t bq_lock; }; /* Struct for every remote "destination" CPU in map */ @@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, for_each_possible_cpu(i) { bq = per_cpu_ptr(rcpu->bulkq, i); bq->obj = rcpu; + local_lock_init(&bq->bq_lock); } /* Alloc queue */ @@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) struct ptr_ring *q; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!bq->count)) return; @@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) } /* Runs under RCU-read-side, plus in softirq under NAPI protection. - * Thus, safe percpu variable access. + * Thus, safe percpu variable access. PREEMPT_RT relies on + * local_lock_nested_bh() to serialise access to the per-CPU bq. */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); + struct xdp_bulk_queue *bq; + + local_lock_nested_bh(&rcpu->bulkq->bq_lock); + bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) bq_flush_to_queue(bq); @@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) list_add(&bq->flush_node, flush_list); } + + local_unlock_nested_bh(&rcpu->bulkq->bq_lock); } int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list) struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->obj->bulkq->bq_lock); bq_flush_to_queue(bq); + local_unlock_nested_bh(&bq->obj->bulkq->bq_lock); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(bq->obj->kthread);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2625601..3d619d0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c
@@ -45,6 +45,7 @@ * types of devmap; only the lookup and insertion is different. */ #include <linux/bpf.h> +#include <linux/local_lock.h> #include <net/xdp.h> #include <linux/filter.h> #include <trace/events/xdp.h> @@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue { struct net_device *dev_rx; struct bpf_prog *xdp_prog; unsigned int count; + local_lock_t bq_lock; }; struct bpf_dtab_netdev { @@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) int to_send = cnt; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!cnt)) return; @@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list) struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); bq_xmit_all(bq, XDP_XMIT_FLUSH); bq->dev_rx = NULL; bq->xdp_prog = NULL; __list_del_clearprev(&bq->flush_node); + local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); } } @@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) /* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu * variable access, and map elements stick around. See comment above - * xdp_do_flush() in filter.c. + * xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh() + * to serialise access to the per-CPU bq. */ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); + struct xdp_dev_bulk_queue *bq; + + local_lock_nested_bh(&dev->xdp_bulkq->bq_lock); + bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) bq_xmit_all(bq, 0); @@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, } bq->q[bq->count++] = xdpf; + + local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock); } static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, @@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin } /* Get ifindex of each upper device. 'indexes' must be able to hold at - * least MAX_NEST_DEV elements. - * Returns the number of ifindexes added. + * least 'max' elements. + * Returns the number of ifindexes added, or -EOVERFLOW if there are too + * many upper devices. */ -static int get_upper_ifindexes(struct net_device *dev, int *indexes) +static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max) { struct net_device *upper; struct list_head *iter; int n = 0; netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (n >= max) + return -EOVERFLOW; indexes[n++] = upper->ifindex; } + return n; } @@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev_rx->ifindex; } @@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev, excluded_devices); + num_excluded = get_upper_ifindexes(dev, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev->ifindex; } @@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier, if (!netdev->xdp_bulkq) return NOTIFY_BAD; - for_each_possible_cpu(cpu) - per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; + for_each_possible_cpu(cpu) { + struct xdp_dev_bulk_queue *bq; + + bq = per_cpu_ptr(netdev->xdp_bulkq, cpu); + bq->dev = netdev; + local_lock_init(&bq->bq_lock); + } break; case NETDEV_UNREGISTER: /* This rcu_read_lock/unlock pair is needed because
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3b9d297..bc6bc8b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c
@@ -125,6 +125,11 @@ struct htab_elem { char key[] __aligned(8); }; +struct htab_btf_record { + struct btf_record *record; + u32 key_size; +}; + static inline bool htab_is_prealloc(const struct bpf_htab *htab) { return !(htab->map.map_flags & BPF_F_NO_PREALLOC); @@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr) return 0; } +static void htab_mem_dtor(void *obj, void *ctx) +{ + struct htab_btf_record *hrec = ctx; + struct htab_elem *elem = obj; + void *map_value; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + map_value = htab_elem_value(elem, hrec->key_size); + bpf_obj_free_fields(hrec->record, map_value); +} + +static void htab_pcpu_mem_dtor(void *obj, void *ctx) +{ + void __percpu *pptr = *(void __percpu **)obj; + struct htab_btf_record *hrec = ctx; + int cpu; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + for_each_possible_cpu(cpu) + bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu)); +} + +static void htab_dtor_ctx_free(void *ctx) +{ + struct htab_btf_record *hrec = ctx; + + btf_record_free(hrec->record); + kfree(ctx); +} + +static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *)) +{ + u32 key_size = htab->map.key_size; + struct bpf_mem_alloc *ma; + struct htab_btf_record *hrec; + int err; + + /* No need for dtors. */ + if (IS_ERR_OR_NULL(htab->map.record)) + return 0; + + hrec = kzalloc(sizeof(*hrec), GFP_KERNEL); + if (!hrec) + return -ENOMEM; + hrec->key_size = key_size; + hrec->record = btf_record_dup(htab->map.record); + if (IS_ERR(hrec->record)) { + err = PTR_ERR(hrec->record); + kfree(hrec); + return err; + } + ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma; + bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec); + return 0; +} + +static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, const struct btf_type *value_type) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + if (htab_is_prealloc(htab)) + return 0; + /* + * We must set the dtor using this callback, as map's BTF record is not + * populated in htab_map_alloc(), so it will always appear as NULL. + */ + if (htab_is_percpu(htab)) + return htab_set_dtor(htab, htab_pcpu_mem_dtor); + else + return htab_set_dtor(htab, htab_mem_dtor); +} + static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], @@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru), .map_btf_id = &htab_map_btf_ids[0], @@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_percpu), .map_btf_id = &htab_map_btf_ids[0], @@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru_percpu), .map_btf_id = &htab_map_btf_ids[0],
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 1ccbf28..8fca0c6 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c
@@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int cgroup_storage_check_btf(const struct bpf_map *map, +static int cgroup_storage_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1adeb4d..0f57608 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c
@@ -751,7 +751,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) return err; } -static int trie_check_btf(const struct bpf_map *map, +static int trie_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index bd45dda..682a9f3 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c
@@ -102,6 +102,8 @@ struct bpf_mem_cache { int percpu_size; bool draining; struct bpf_mem_cache *tgt; + void (*dtor)(void *obj, void *ctx); + void *dtor_ctx; /* list of objects to be freed after RCU GP */ struct llist_head free_by_rcu; @@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu) kfree(obj); } -static int free_all(struct llist_node *llnode, bool percpu) +static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu) { struct llist_node *pos, *t; int cnt = 0; llist_for_each_safe(pos, t, llnode) { + if (c->dtor) + c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx); free_one(pos, percpu); cnt++; } @@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); atomic_set(&c->call_rcu_ttrace_in_progress, 0); } @@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c) if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) { if (unlikely(READ_ONCE(c->draining))) { llnode = llist_del_all(&c->free_by_rcu_ttrace); - free_all(llnode, !!c->percpu_size); + free_all(c, llnode, !!c->percpu_size); } return; } @@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c) dec_active(c, &flags); if (unlikely(READ_ONCE(c->draining))) { - free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } else { call_rcu_hurry(&c->rcu, __free_by_rcu); @@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c) * Except for waiting_for_gp_ttrace list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ - free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu); - free_all(__llist_del_all(&c->free_llist), percpu); - free_all(__llist_del_all(&c->free_llist_extra), percpu); - free_all(__llist_del_all(&c->free_by_rcu), percpu); - free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu); - free_all(llist_del_all(&c->waiting_for_gp), percpu); + free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu); + free_all(c, __llist_del_all(&c->free_llist), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra), percpu); + free_all(c, __llist_del_all(&c->free_by_rcu), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp), percpu); } static void check_mem_cache(struct bpf_mem_cache *c) @@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma) static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) { + /* We can free dtor ctx only once all callbacks are done using it. */ + if (ma->dtor_ctx_free) + ma->dtor_ctx_free(ma->dtor_ctx); check_leaked_objs(ma); free_percpu(ma->cache); free_percpu(ma->caches); @@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size) return 0; } + +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), void *ctx) +{ + struct bpf_mem_caches *cc; + struct bpf_mem_cache *c; + int cpu, i; + + ma->dtor_ctx_free = dtor_ctx_free; + ma->dtor_ctx = ctx; + + if (ma->cache) { + for_each_possible_cpu(cpu) { + c = per_cpu_ptr(ma->cache, cpu); + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + if (ma->caches) { + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(ma->caches, cpu); + for (i = 0; i < NUM_CACHES; i++) { + c = &cc->cache[i]; + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + } +}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0378e83..70093878 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c
@@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) } EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) @@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) bpf_link_dealloc(link); } +static bool bpf_link_is_tracepoint(struct bpf_link *link) +{ + /* + * Only these combinations support a tracepoint bpf_link. + * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use + * bpf_raw_tp_link_lops and thus dealloc_deferred(), see + * bpf_raw_tp_link_attach(). + */ + return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT || + (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP); +} + static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) { if (rcu_trace_implies_rcu_gp()) @@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link) if (link->prog) ops->release(link); if (ops->dealloc_deferred) { - /* Schedule BPF link deallocation, which will only then + /* + * Schedule BPF link deallocation, which will only then * trigger putting BPF program refcount. * If underlying BPF program is sleepable or BPF link's target * attach hookpoint is sleepable or otherwise requires RCU GPs * to ensure link and its underlying BPF program is not * reachable anymore, we need to first wait for RCU tasks - * trace sync, and then go through "classic" RCU grace period + * trace sync, and then go through "classic" RCU grace period. + * + * For tracepoint BPF links, we need to go through SRCU grace + * period wait instead when non-faultable tracepoint is used. We + * don't need to chain SRCU grace period waits, however, for the + * faultable case, since it exclusively uses RCU Tasks Trace. */ if (link->sleepable || (link->prog && link->prog->sleepable)) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */ + else if (bpf_link_is_tracepoint(link)) + call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); } else if (ops->dealloc) {
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 26fbfbb..4abc359 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c
@@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a) { return TNUM(swab64(a.value), swab64(a.mask)); } + +/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin + * is the smallest member of the t (= t.value) and tmax is the largest + * member of t (= t.value | t.mask), returns the smallest member of t + * larger than z. + * + * For example, + * t = x11100x0 + * z = 11110001 (241) + * result = 11110010 (242) + * + * Note: if this function is called with z >= tmax, it just returns + * early with tmax; if this function is called with z < tmin, the + * algorithm already returns tmin. + */ +u64 tnum_step(struct tnum t, u64 z) +{ + u64 tmax, j, p, q, r, s, v, u, w, res; + u8 k; + + tmax = t.value | t.mask; + + /* if z >= largest member of t, return largest member of t */ + if (z >= tmax) + return tmax; + + /* if z < smallest member of t, return smallest member of t */ + if (z < t.value) + return t.value; + + /* keep t's known bits, and match all unknown bits to z */ + j = t.value | (z & t.mask); + + if (j > z) { + p = ~z & t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 0-to-1 flip */ + q = U64_MAX << k; + r = q & z; /* positions > k matched to z */ + s = ~q & t.value; /* positions <= k matched to t.value */ + v = r | s; + res = v; + } else { + p = z & ~t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 1-to-0 flip */ + q = U64_MAX << k; + r = q & t.mask & z; /* unknown positions > k, matched to z */ + s = q & ~t.mask; /* known positions > k, set to 1 */ + v = r | s; + /* add 1 to unknown positions > k to make value greater than z */ + u = v + (1ULL << k); + /* extract bits in unknown positions > k from u, rest from t.value */ + w = (u & t.mask) | t.value; + res = w; + } + return res; +}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 84db9e6..f02254a 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c
@@ -1002,10 +1002,8 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, mutex_lock(&tr->mutex); shim_link = cgroup_shim_find(tr, bpf_func); - if (shim_link) { + if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) { /* Reusing existing shim attached by the other program. */ - bpf_link_inc(&shim_link->link.link); - mutex_unlock(&tr->mutex); bpf_trampoline_put(tr); /* bpf_trampoline_get above */ return 0;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb12ba0..e381415 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c
@@ -617,6 +617,13 @@ static bool is_atomic_load_insn(const struct bpf_insn *insn) insn->imm == BPF_LOAD_ACQ; } +static bool is_atomic_fetch_insn(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_ATOMIC && + (insn->imm & BPF_FETCH); +} + static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; @@ -2379,6 +2386,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg) static void __update_reg64_bounds(struct bpf_reg_state *reg) { + u64 tnum_next, tmax; + bool umin_in_tnum; + /* min signed is max(sign bit) | min(other bits) */ reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN)); @@ -2388,6 +2398,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg) reg->umin_value = max(reg->umin_value, reg->var_off.value); reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask); + + /* Check if u64 and tnum overlap in a single value */ + tnum_next = tnum_step(reg->var_off, reg->umin_value); + umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value; + tmax = reg->var_off.value | reg->var_off.mask; + if (umin_in_tnum && tnum_next > reg->umax_value) { + /* The u64 range and the tnum only overlap in umin. + * u64: ---[xxxxxx]----- + * tnum: --xx----------x- + */ + ___mark_reg_known(reg, reg->umin_value); + } else if (!umin_in_tnum && tnum_next == tmax) { + /* The u64 range and the tnum only overlap in the maximum value + * represented by the tnum, called tmax. + * u64: ---[xxxxxx]----- + * tnum: xx-----x-------- + */ + ___mark_reg_known(reg, tmax); + } else if (!umin_in_tnum && tnum_next <= reg->umax_value && + tnum_step(reg->var_off, tnum_next) > reg->umax_value) { + /* The u64 range and the tnum only overlap in between umin + * (excluded) and umax. + * u64: ---[xxxxxx]----- + * tnum: xx----x-------x- + */ + ___mark_reg_known(reg, tnum_next); + } } static void __update_reg_bounds(struct bpf_reg_state *reg) @@ -2481,6 +2518,30 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value); reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value); + } else { + if (reg->u32_max_value < (u32)reg->s32_min_value) { + /* See __reg64_deduce_bounds() for detailed explanation. + * Refine ranges in the following situation: + * + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxx s32 range xxxxxxxxx] [xxxxxxx| + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_min_value = (s32)reg->u32_min_value; + reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value); + } else if ((u32)reg->s32_max_value < reg->u32_min_value) { + /* + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxxxxxx] [xxxxxxxxxxxx s32 range | + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_max_value = (s32)reg->u32_max_value; + reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value); + } } } @@ -4393,10 +4454,24 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * dreg still needs precision before this insn */ } - } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { - if (!bt_is_reg_set(bt, dreg)) + } else if (class == BPF_LDX || + is_atomic_load_insn(insn) || + is_atomic_fetch_insn(insn)) { + u32 load_reg = dreg; + + /* + * Atomic fetch operation writes the old value into + * a register (sreg or r0) and if it was tracked for + * precision, propagate to the stack slot like we do + * in regular ldx. + */ + if (is_atomic_fetch_insn(insn)) + load_reg = insn->imm == BPF_CMPXCHG ? + BPF_REG_0 : sreg; + + if (!bt_is_reg_set(bt, load_reg)) return 0; - bt_clear_reg(bt, dreg); + bt_clear_reg(bt, load_reg); /* scalars can only be spilled into stack w/o losing precision. * Load from any other memory can be zero extended. @@ -7851,7 +7926,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); - } else if (base_type(reg->type) == PTR_TO_BUF) { + } else if (base_type(reg->type) == PTR_TO_BUF && + !type_may_be_null(reg->type)) { bool rdonly_mem = type_is_rdonly_mem(reg->type); u32 *max_access; @@ -15856,6 +15932,13 @@ static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *ins /* Apply bswap if alu64 or switch between big-endian and little-endian machines */ bool need_bswap = alu64 || (to_le == is_big_endian); + /* + * If the register is mutated, manually reset its scalar ID to break + * any existing ties and avoid incorrect bounds propagation. + */ + if (need_bswap || insn->imm == 16 || insn->imm == 32) + dst_reg->id = 0; + if (need_bswap) { if (insn->imm == 16) dst_reg->var_off = tnum_bswap16(dst_reg->var_off); @@ -15938,7 +16021,7 @@ static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *ins else return 0; - branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); + branch = push_stack(env, env->insn_idx, env->insn_idx, false); if (IS_ERR(branch)) return PTR_ERR(branch); @@ -17305,17 +17388,24 @@ static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_st * in verifier state, save R in linked_regs if R->id == id. * If there are too many Rs sharing same id, reset id for leftover Rs. */ -static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id, +static void collect_linked_regs(struct bpf_verifier_env *env, + struct bpf_verifier_state *vstate, + u32 id, struct linked_regs *linked_regs) { + struct bpf_insn_aux_data *aux = env->insn_aux_data; struct bpf_func_state *func; struct bpf_reg_state *reg; + u16 live_regs; int i, j; id = id & ~BPF_ADD_CONST; for (i = vstate->curframe; i >= 0; i--) { + live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before; func = vstate->frame[i]; for (j = 0; j < BPF_REG_FP; j++) { + if (!(live_regs & BIT(j))) + continue; reg = &func->regs[j]; __collect_linked_regs(linked_regs, reg, id, i, j, true); } @@ -17347,6 +17437,12 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s continue; if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) continue; + /* + * Skip mixed 32/64-bit links: the delta relationship doesn't + * hold across different ALU widths. + */ + if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST) + continue; if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || reg->off == known_reg->off) { s32 saved_subreg_def = reg->subreg_def; @@ -17374,7 +17470,7 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s scalar32_min_max_add(reg, &fake_reg); scalar_min_max_add(reg, &fake_reg); reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); - if (known_reg->id & BPF_ADD_CONST32) + if ((reg->id | known_reg->id) & BPF_ADD_CONST32) zext_32_to_64(reg); reg_bounds_sync(reg); } @@ -17530,9 +17626,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * if parent state is created. */ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) - collect_linked_regs(this_branch, src_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, src_reg->id, &linked_regs); if (dst_reg->type == SCALAR_VALUE && dst_reg->id) - collect_linked_regs(this_branch, dst_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs); if (linked_regs.cnt > 1) { err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); if (err) @@ -19802,11 +19898,14 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, * Also verify that new value satisfies old value range knowledge. */ - /* ADD_CONST mismatch: different linking semantics */ - if ((rold->id & BPF_ADD_CONST) && !(rcur->id & BPF_ADD_CONST)) - return false; - - if (rold->id && !(rold->id & BPF_ADD_CONST) && (rcur->id & BPF_ADD_CONST)) + /* + * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and + * BPF_ADD_CONST64 have different linking semantics in + * sync_linked_regs() (alu32 zero-extends, alu64 does not), + * so pruning across different flag types is unsafe. + */ + if (rold->id && + (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) return false; /* Both have offset linkage: offsets must match */ @@ -19838,8 +19937,13 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, * since someone could have accessed through (ptr - k), or * even done ptr -= k in a register, to get a safe access. */ - if (rold->range > rcur->range) + if (rold->range < 0 || rcur->range < 0) { + /* special case for [BEYOND|AT]_PKT_END */ + if (rold->range != rcur->range) + return false; + } else if (rold->range > rcur->range) { return false; + } /* If the offsets don't match, we can't trust our alignment; * nor can we be sure that we won't fall out of range. */ @@ -20843,7 +20947,8 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env, * state when it exits. */ int err = check_resource_leak(env, exception_exit, - !env->cur_state->curframe, + exception_exit || !env->cur_state->curframe, + exception_exit ? "bpf_throw" : "BPF_EXIT instruction in main prog"); if (err) return err; @@ -25231,7 +25336,6 @@ BTF_ID(func, __x64_sys_exit_group) BTF_ID(func, do_exit) BTF_ID(func, do_group_exit) BTF_ID(func, kthread_complete_and_exit) -BTF_ID(func, kthread_exit) BTF_ID(func, make_task_dead) BTF_SET_END(noreturn_deny)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c22cda7..4ca3cb9 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c
@@ -2126,6 +2126,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); + init_waitqueue_head(&cgrp->dying_populated_waitq); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -2608,6 +2609,7 @@ static void cgroup_migrate_add_task(struct task_struct *task, mgctx->tset.nr_tasks++; + css_set_skip_task_iters(cset, task); list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, @@ -5108,6 +5110,12 @@ static void css_task_iter_advance(struct css_task_iter *it) return; task = list_entry(it->task_pos, struct task_struct, cg_list); + /* + * Hide tasks that are exiting but not yet removed. Keep zombie + * leaders with live threads visible. + */ + if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) + goto repeat; if (it->flags & CSS_TASK_ITER_PROCS) { /* if PROCS, skip over tasks which aren't group leaders */ @@ -6217,6 +6225,78 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return 0; }; +/** + * cgroup_drain_dying - wait for dying tasks to leave before rmdir + * @cgrp: the cgroup being removed + * + * cgroup.procs and cgroup.threads use css_task_iter which filters out + * PF_EXITING tasks so that userspace doesn't see tasks that have already been + * reaped via waitpid(). However, cgroup_has_tasks() - which tests whether the + * cgroup has non-empty css_sets - is only updated when dying tasks pass through + * cgroup_task_dead() in finish_task_switch(). This creates a window where + * cgroup.procs reads empty but cgroup_has_tasks() is still true, making rmdir + * fail with -EBUSY from cgroup_destroy_locked() even though userspace sees no + * tasks. + * + * This function aligns cgroup_has_tasks() with what userspace can observe. If + * cgroup_has_tasks() but the task iterator sees nothing (all remaining tasks are + * PF_EXITING), we wait for cgroup_task_dead() to finish processing them. As the + * window between PF_EXITING and cgroup_task_dead() is short, the wait is brief. + * + * This function only concerns itself with this cgroup's own dying tasks. + * Whether the cgroup has children is cgroup_destroy_locked()'s problem. + * + * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we + * retry the full check from scratch. + * + * Must be called with cgroup_mutex held. + */ +static int cgroup_drain_dying(struct cgroup *cgrp) + __releases(&cgroup_mutex) __acquires(&cgroup_mutex) +{ + struct css_task_iter it; + struct task_struct *task; + DEFINE_WAIT(wait); + + lockdep_assert_held(&cgroup_mutex); +retry: + if (!cgroup_has_tasks(cgrp)) + return 0; + + /* Same iterator as cgroup.threads - if any task is visible, it's busy */ + css_task_iter_start(&cgrp->self, 0, &it); + task = css_task_iter_next(&it); + css_task_iter_end(&it); + + if (task) + return -EBUSY; + + /* + * All remaining tasks are PF_EXITING and will pass through + * cgroup_task_dead() shortly. Wait for a kick and retry. + * + * cgroup_has_tasks() can't transition from false to true while we're + * holding cgroup_mutex, but the true to false transition happens + * under css_set_lock (via cgroup_task_dead()). We must retest and + * prepare_to_wait() under css_set_lock. Otherwise, the transition + * can happen between our first test and prepare_to_wait(), and we + * sleep with no one to wake us. + */ + spin_lock_irq(&css_set_lock); + if (!cgroup_has_tasks(cgrp)) { + spin_unlock_irq(&css_set_lock); + return 0; + } + prepare_to_wait(&cgrp->dying_populated_waitq, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&css_set_lock); + mutex_unlock(&cgroup_mutex); + schedule(); + finish_wait(&cgrp->dying_populated_waitq, &wait); + mutex_lock(&cgroup_mutex); + goto retry; +} + int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; @@ -6226,9 +6306,12 @@ int cgroup_rmdir(struct kernfs_node *kn) if (!cgrp) return 0; - ret = cgroup_destroy_locked(cgrp); - if (!ret) - TRACE_CGROUP_PATH(rmdir, cgrp); + ret = cgroup_drain_dying(cgrp); + if (!ret) { + ret = cgroup_destroy_locked(cgrp); + if (!ret) + TRACE_CGROUP_PATH(rmdir, cgrp); + } cgroup_kn_unlock(kn); return ret; @@ -6988,6 +7071,7 @@ void cgroup_task_exit(struct task_struct *tsk) static void do_cgroup_task_dead(struct task_struct *tsk) { + struct cgrp_cset_link *link; struct css_set *cset; unsigned long flags; @@ -7001,6 +7085,11 @@ static void do_cgroup_task_dead(struct task_struct *tsk) if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); + /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */ + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) + if (waitqueue_active(&link->cgrp->dying_populated_waitq)) + wake_up(&link->cgrp->dying_populated_waitq); + if (dl_task(tsk)) dec_dl_tasks_cs(tsk);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 9faf343..1335e437 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c
@@ -62,6 +62,75 @@ static const char * const perr_strings[] = { }; /* + * CPUSET Locking Convention + * ------------------------- + * + * Below are the four global/local locks guarding cpuset structures in lock + * acquisition order: + * - cpuset_top_mutex + * - cpu_hotplug_lock (cpus_read_lock/cpus_write_lock) + * - cpuset_mutex + * - callback_lock (raw spinlock) + * + * As cpuset will now indirectly flush a number of different workqueues in + * housekeeping_update() to update housekeeping cpumasks when the set of + * isolated CPUs is going to be changed, it may be vulnerable to deadlock + * if we hold cpus_read_lock while calling into housekeeping_update(). + * + * The first cpuset_top_mutex will be held except when calling into + * cpuset_handle_hotplug() from the CPU hotplug code where cpus_write_lock + * and cpuset_mutex will be held instead. The main purpose of this mutex + * is to prevent regular cpuset control file write actions from interfering + * with the call to housekeeping_update(), though CPU hotplug operation can + * still happen in parallel. This mutex also provides protection for some + * internal variables. + * + * A task must hold all the remaining three locks to modify externally visible + * or used fields of cpusets, though some of the internally used cpuset fields + * and internal variables can be modified without holding callback_lock. If only + * reliable read access of the externally used fields are needed, a task can + * hold either cpuset_mutex or callback_lock which are exposed to other + * external subsystems. + * + * If a task holds cpu_hotplug_lock and cpuset_mutex, it blocks others, + * ensuring that it is the only task able to also acquire callback_lock and + * be able to modify cpusets. It can perform various checks on the cpuset + * structure first, knowing nothing will change. It can also allocate memory + * without holding callback_lock. While it is performing these checks, various + * callback routines can briefly acquire callback_lock to query cpusets. Once + * it is ready to make the changes, it takes callback_lock, blocking everyone + * else. + * + * Calls to the kernel memory allocator cannot be made while holding + * callback_lock which is a spinlock, as the memory allocator may sleep or + * call back into cpuset code and acquire callback_lock. + * + * Now, the task_struct fields mems_allowed and mempolicy may be changed + * by other task, we use alloc_lock in the task_struct fields to protect + * them. + * + * The cpuset_common_seq_show() handlers only hold callback_lock across + * small pieces of code, such as when reading out possibly multi-word + * cpumasks and nodemasks. + */ + +static DEFINE_MUTEX(cpuset_top_mutex); +static DEFINE_MUTEX(cpuset_mutex); + +/* + * File level internal variables below follow one of the following exclusion + * rules. + * + * RWCS: Read/write-able by holding either cpus_write_lock (and optionally + * cpuset_mutex) or both cpus_read_lock and cpuset_mutex. + * + * CSCB: Readable by holding either cpuset_mutex or callback_lock. Writable + * by holding both cpuset_mutex and callback_lock. + * + * T: Read/write-able by holding the cpuset_top_mutex. + */ + +/* * For local partitions, update to subpartitions_cpus & isolated_cpus is done * in update_parent_effective_cpumask(). For remote partitions, it is done in * the remote_partition_*() and remote_cpus_update() helpers. @@ -70,19 +139,22 @@ static const char * const perr_strings[] = { * Exclusive CPUs distributed out to local or remote sub-partitions of * top_cpuset */ -static cpumask_var_t subpartitions_cpus; +static cpumask_var_t subpartitions_cpus; /* RWCS */ /* - * Exclusive CPUs in isolated partitions + * Exclusive CPUs in isolated partitions (shown in cpuset.cpus.isolated) */ -static cpumask_var_t isolated_cpus; +static cpumask_var_t isolated_cpus; /* CSCB */ /* - * isolated_cpus updating flag (protected by cpuset_mutex) - * Set if isolated_cpus is going to be updated in the current - * cpuset_mutex crtical section. + * Set if housekeeping cpumasks are to be updated. */ -static bool isolated_cpus_updating; +static bool update_housekeeping; /* RWCS */ + +/* + * Copy of isolated_cpus to be passed to housekeeping_update() + */ +static cpumask_var_t isolated_hk_cpus; /* T */ /* * A flag to force sched domain rebuild at the end of an operation. @@ -98,7 +170,7 @@ static bool isolated_cpus_updating; * Note that update_relax_domain_level() in cpuset-v1.c can still call * rebuild_sched_domains_locked() directly without using this flag. */ -static bool force_sd_rebuild; +static bool force_sd_rebuild; /* RWCS */ /* * Partition root states: @@ -218,42 +290,6 @@ struct cpuset top_cpuset = { .partition_root_state = PRS_ROOT, }; -/* - * There are two global locks guarding cpuset structures - cpuset_mutex and - * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel - * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset - * structures. Note that cpuset_mutex needs to be a mutex as it is used in - * paths that rely on priority inheritance (e.g. scheduler - on RT) for - * correctness. - * - * A task must hold both locks to modify cpusets. If a task holds - * cpuset_mutex, it blocks others, ensuring that it is the only task able to - * also acquire callback_lock and be able to modify cpusets. It can perform - * various checks on the cpuset structure first, knowing nothing will change. - * It can also allocate memory while just holding cpuset_mutex. While it is - * performing these checks, various callback routines can briefly acquire - * callback_lock to query cpusets. Once it is ready to make the changes, it - * takes callback_lock, blocking everyone else. - * - * Calls to the kernel memory allocator can not be made while holding - * callback_lock, as that would risk double tripping on callback_lock - * from one of the callbacks into the cpuset code from within - * __alloc_pages(). - * - * If a task is only holding callback_lock, then it has read-only - * access to cpusets. - * - * Now, the task_struct fields mems_allowed and mempolicy may be changed - * by other task, we use alloc_lock in the task_struct fields to protect - * them. - * - * The cpuset_common_seq_show() handlers only hold callback_lock across - * small pieces of code, such as when reading out possibly multi-word - * cpumasks and nodemasks. - */ - -static DEFINE_MUTEX(cpuset_mutex); - /** * cpuset_lock - Acquire the global cpuset mutex * @@ -283,6 +319,7 @@ void lockdep_assert_cpuset_lock_held(void) */ void cpuset_full_lock(void) { + mutex_lock(&cpuset_top_mutex); cpus_read_lock(); mutex_lock(&cpuset_mutex); } @@ -291,12 +328,14 @@ void cpuset_full_unlock(void) { mutex_unlock(&cpuset_mutex); cpus_read_unlock(); + mutex_unlock(&cpuset_top_mutex); } #ifdef CONFIG_LOCKDEP bool lockdep_is_cpuset_held(void) { - return lockdep_is_held(&cpuset_mutex); + return lockdep_is_held(&cpuset_mutex) || + lockdep_is_held(&cpuset_top_mutex); } #endif @@ -840,7 +879,7 @@ static int generate_sched_domains(cpumask_var_t **domains, /* * Cgroup v2 doesn't support domain attributes, just set all of them * to SD_ATTR_INIT. Also non-isolating partition root CPUs are a - * subset of HK_TYPE_DOMAIN housekeeping CPUs. + * subset of HK_TYPE_DOMAIN_BOOT housekeeping CPUs. */ for (i = 0; i < ndoms; i++) { /* @@ -849,7 +888,7 @@ static int generate_sched_domains(cpumask_var_t **domains, */ if (!csa || csa[i] == &top_cpuset) cpumask_and(doms[i], top_cpuset.effective_cpus, - housekeeping_cpumask(HK_TYPE_DOMAIN)); + housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT)); else cpumask_copy(doms[i], csa[i]->effective_cpus); if (dattr) @@ -961,7 +1000,7 @@ void rebuild_sched_domains_locked(void) * offline CPUs, a warning is emitted and we return directly to * prevent the panic. */ - for (i = 0; i < ndoms; ++i) { + for (i = 0; doms && i < ndoms; i++) { if (WARN_ON_ONCE(!cpumask_subset(doms[i], cpu_active_mask))) return; } @@ -1161,12 +1200,18 @@ static void reset_partition_data(struct cpuset *cs) static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus) { WARN_ON_ONCE(old_prs == new_prs); - if (new_prs == PRS_ISOLATED) + lockdep_assert_held(&callback_lock); + lockdep_assert_held(&cpuset_mutex); + if (new_prs == PRS_ISOLATED) { + if (cpumask_subset(xcpus, isolated_cpus)) + return; cpumask_or(isolated_cpus, isolated_cpus, xcpus); - else + } else { + if (!cpumask_intersects(xcpus, isolated_cpus)) + return; cpumask_andnot(isolated_cpus, isolated_cpus, xcpus); - - isolated_cpus_updating = true; + } + update_housekeeping = true; } /* @@ -1219,8 +1264,8 @@ static void partition_xcpus_del(int old_prs, struct cpuset *parent, isolated_cpus_update(old_prs, parent->partition_root_state, xcpus); - cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); + cpumask_and(parent->effective_cpus, parent->effective_cpus, cpu_active_mask); } /* @@ -1284,22 +1329,45 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) } /* - * update_isolation_cpumasks - Update external isolation related CPU masks + * cpuset_update_sd_hk_unlock - Rebuild sched domains, update HK & unlock * - * The following external CPU masks will be updated if necessary: - * - workqueue unbound cpumask + * Update housekeeping cpumasks and rebuild sched domains if necessary and + * then do a cpuset_full_unlock(). + * This should be called at the end of cpuset operation. */ -static void update_isolation_cpumasks(void) +static void cpuset_update_sd_hk_unlock(void) + __releases(&cpuset_mutex) + __releases(&cpuset_top_mutex) { - int ret; + /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ + if (force_sd_rebuild) + rebuild_sched_domains_locked(); - if (!isolated_cpus_updating) - return; + if (update_housekeeping) { + update_housekeeping = false; + cpumask_copy(isolated_hk_cpus, isolated_cpus); - ret = housekeeping_update(isolated_cpus); - WARN_ON_ONCE(ret < 0); + /* + * housekeeping_update() is now called without holding + * cpus_read_lock and cpuset_mutex. Only cpuset_top_mutex + * is still being held for mutual exclusion. + */ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); + WARN_ON_ONCE(housekeeping_update(isolated_hk_cpus)); + mutex_unlock(&cpuset_top_mutex); + } else { + cpuset_full_unlock(); + } +} - isolated_cpus_updating = false; +/* + * Work function to invoke cpuset_update_sd_hk_unlock() + */ +static void hk_sd_workfn(struct work_struct *work) +{ + cpuset_full_lock(); + cpuset_update_sd_hk_unlock(); } /** @@ -1450,7 +1518,6 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, cs->remote_partition = true; cpumask_copy(cs->effective_xcpus, tmp->new_cpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); cpuset_force_rebuild(); cs->prs_err = 0; @@ -1495,7 +1562,6 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) compute_excpus(cs, cs->effective_xcpus); reset_partition_data(cs); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); cpuset_force_rebuild(); /* @@ -1566,7 +1632,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus, if (xcpus) cpumask_copy(cs->exclusive_cpus, xcpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); if (adding || deleting) cpuset_force_rebuild(); @@ -1910,7 +1975,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, partition_xcpus_add(new_prs, parent, tmp->delmask); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); if ((old_prs != new_prs) && (cmd == partcmd_update)) update_partition_exclusive_flag(cs, new_prs); @@ -2155,7 +2219,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); - cpuset_update_tasks_cpumask(cp, cp->effective_cpus); + cpuset_update_tasks_cpumask(cp, tmp->new_cpus); /* * On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE @@ -2878,7 +2942,6 @@ static int update_prstate(struct cpuset *cs, int new_prs) else if (isolcpus_updated) isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); - update_isolation_cpumasks(); /* Force update if switching back to member & update effective_xcpus */ update_cpumasks_hier(cs, &tmpmask, !new_prs); @@ -2925,7 +2988,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; struct cpuset *cs, *oldcs; struct task_struct *task; - bool cpus_updated, mems_updated; + bool setsched_check; int ret; /* used later by cpuset_attach() */ @@ -2940,20 +3003,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) if (ret) goto out_unlock; - cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); - mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); + /* + * Skip rights over task setsched check in v2 when nothing changes, + * migration permission derives from hierarchy ownership in + * cgroup_procs_write_permission()). + */ + setsched_check = !cpuset_v2() || + !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus) || + !nodes_equal(cs->effective_mems, oldcs->effective_mems); + + /* + * A v1 cpuset with tasks will have no CPU left only when CPU hotplug + * brings the last online CPU offline as users are not allowed to empty + * cpuset.cpus when there are active tasks inside. When that happens, + * we should allow tasks to migrate out without security check to make + * sure they will be able to run after migration. + */ + if (!is_in_v2_mode() && cpumask_empty(oldcs->effective_cpus)) + setsched_check = false; cgroup_taskset_for_each(task, css, tset) { ret = task_can_attach(task); if (ret) goto out_unlock; - /* - * Skip rights over task check in v2 when nothing changes, - * migration permission derives from hierarchy ownership in - * cgroup_procs_write_permission()). - */ - if (!cpuset_v2() || (cpus_updated || mems_updated)) { + if (setsched_check) { ret = security_task_setscheduler(task); if (ret) goto out_unlock; @@ -3168,10 +3242,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of, } free_cpuset(trialcs); - if (force_sd_rebuild) - rebuild_sched_domains_locked(); out_unlock: - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); if (of_cft(of)->private == FILE_MEMLIST) schedule_flush_migrate_mm(); return retval ?: nbytes; @@ -3278,7 +3350,7 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, cpuset_full_lock(); if (is_cpuset_online(cs)) retval = update_prstate(cs, val); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); return retval ?: nbytes; } @@ -3452,7 +3524,7 @@ static void cpuset_css_killed(struct cgroup_subsys_state *css) /* Reset valid partition back to member */ if (is_partition_valid(cs)) update_prstate(cs, PRS_MEMBER); - cpuset_full_unlock(); + cpuset_update_sd_hk_unlock(); } static void cpuset_css_free(struct cgroup_subsys_state *css) @@ -3607,6 +3679,7 @@ int __init cpuset_init(void) BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL)); + BUG_ON(!zalloc_cpumask_var(&isolated_hk_cpus, GFP_KERNEL)); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); @@ -3778,6 +3851,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) */ static void cpuset_handle_hotplug(void) { + static DECLARE_WORK(hk_sd_work, hk_sd_workfn); static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; @@ -3859,9 +3933,25 @@ static void cpuset_handle_hotplug(void) rcu_read_unlock(); } - /* rebuild sched domains if necessary */ + /* + * rebuild_sched_domains() will always be called directly if needed + * to make sure that newly added or removed CPU will be reflected in + * the sched domains. However, if isolated partition invalidation + * or recreation is being done (update_housekeeping set), a work item + * will be queued to call housekeeping_update() to update the + * corresponding housekeeping cpumasks after some slight delay. + * + * We rely on WORK_STRUCT_PENDING_BIT to not requeue a work item that + * is still pending. Before the pending bit is cleared, the work data + * is copied out and work item dequeued. So it is possible to queue + * the work again before the hk_sd_workfn() is invoked to process the + * previously queued work. Since hk_sd_workfn() doesn't use the work + * item at all, this is not a problem. + */ if (force_sd_rebuild) rebuild_sched_domains_cpuslocked(); + if (update_housekeeping) + queue_work(system_dfl_wq, &hk_sd_work); free_tmpmasks(ptmp); }
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 774702591..307c97a 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config
@@ -29,7 +29,6 @@ # CONFIG_UBSAN_ALIGNMENT is not set # CONFIG_UBSAN_DIV_ZERO is not set # CONFIG_UBSAN_TRAP is not set -# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y CONFIG_DEBUG_IRQFLAGS=y
diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 1f4067f..a20d409 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c
@@ -168,8 +168,8 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key) memcpy(dm_key->data, ukp->data, ukp->datalen); dm_key->key_size = ukp->datalen; - kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size, - dm_key->key_desc, dm_key->data); + kexec_dprintk("Get dm crypt key (size=%u) %s\n", dm_key->key_size, + dm_key->key_desc); out: up_read(&key->sem);
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 86f87e4..0677918 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) return overlap; } -static void active_cacheline_inc_overlap(phys_addr_t cln) +static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean) { int overlap = active_cacheline_read_overlap(cln); @@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) /* If we overflowed the overlap counter then we're potentially * leaking dma-mappings. */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP, pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), ACTIVE_CACHELINE_MAX_OVERLAP, &cln); } @@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry, if (rc == -EEXIST) { struct dma_debug_entry *existing; - active_cacheline_inc_overlap(cln); + active_cacheline_inc_overlap(cln, entry->is_cache_clean); existing = radix_tree_lookup(&dma_active_cacheline, cln); /* A lookup failure here after we got -EEXIST is unexpected. */ WARN_ON(!existing); @@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) unsigned long flags; int rc; - entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN); + entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES | + DMA_ATTR_REQUIRE_COHERENT); bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry);
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index f476c63..6184ff3 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h
@@ -84,8 +84,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr_t dma_addr; if (is_swiotlb_force_bounce(dev)) { - if (attrs & DMA_ATTR_MMIO) - goto err_overflow; + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) + return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); } @@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, dma_addr = phys_to_dma(dev, phys); if (unlikely(!dma_capable(dev, dma_addr, size, true)) || dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_swiotlb_active(dev)) + if (is_swiotlb_active(dev) && + !(attrs & DMA_ATTR_REQUIRE_COHERENT)) return swiotlb_map(dev, phys, size, dir, attrs); goto err_overflow; @@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, { phys_addr_t phys; - if (attrs & DMA_ATTR_MMIO) + if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) /* nothing to do: uncached and no swiotlb */ return;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 3928a50..6d3dd0b 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c
@@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops) || (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) addr = dma_direct_map_phys(dev, phys, size, dir, attrs); @@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); + if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT)) + return -EOPNOTSUPP; + if (WARN_ON_ONCE(!dev->dma_mask)) return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d8e6f1d..9fd7370 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c
@@ -30,6 +30,7 @@ #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/io.h> +#include <linux/kmsan-checks.h> #include <linux/iommu-helper.h> #include <linux/init.h> #include <linux/memblock.h> @@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size local_irq_save(flags); page = pfn_to_page(pfn); - if (dir == DMA_TO_DEVICE) + if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_highmem_page() + * could be used here to detect infoleaks, + * but callers may map uninitialized buffers + * that will be written by the device, + * causing false positives. + */ memcpy_from_page(vaddr, page, offset, sz); - else + } else { + kmsan_unpoison_memory(vaddr, sz); memcpy_to_page(page, offset, vaddr, sz); + } local_irq_restore(flags); size -= sz; @@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size offset = 0; } } else if (dir == DMA_TO_DEVICE) { + /* + * Ideally, kmsan_check_memory() could be used here to detect + * infoleaks (uninitialized data being sent to device), but + * callers may map uninitialized buffers that will be written + * by the device, causing false positives. + */ memcpy(vaddr, phys_to_virt(orig_addr), size); } else { + kmsan_unpoison_memory(vaddr, size); memcpy(phys_to_virt(orig_addr), vaddr, size); } }
diff --git a/kernel/events/core.c b/kernel/events/core.c index ac70d68..89b40e4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data) if (*perf_event_fasync(event)) event->pending_kill = POLL_ERR; - perf_event_wakeup(event); + event->pending_wakeup = 1; + irq_work_queue(&event->pending_irq); } else { struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu); @@ -4812,7 +4813,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); - struct pmu *pmu = event->pmu; + struct pmu *pmu; /* * If this is a task context, we need to check whether it is @@ -4824,7 +4825,7 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - raw_spin_lock(&ctx->lock); + guard(raw_spinlock)(&ctx->lock); ctx_time_update_event(ctx, event); perf_event_update_time(event); @@ -4832,25 +4833,22 @@ static void __perf_event_read(void *info) perf_event_update_sibling_time(event); if (event->state != PERF_EVENT_STATE_ACTIVE) - goto unlock; + return; if (!data->group) { - pmu->read(event); + perf_pmu_read(event); data->ret = 0; - goto unlock; + return; } + pmu = event->pmu_ctx->pmu; pmu->start_txn(pmu, PERF_PMU_TXN_READ); - pmu->read(event); - + perf_pmu_read(event); for_each_sibling_event(sub, event) perf_pmu_read(sub); data->ret = pmu->commit_txn(pmu); - -unlock: - raw_spin_unlock(&ctx->lock); } static inline u64 perf_event_count(struct perf_event *event, bool self) @@ -7464,29 +7462,29 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ret = perf_mmap_aux(vma, event, nr_pages); if (ret) return ret; + + /* + * Since pinned accounting is per vm we cannot allow fork() to copy our + * vma. + */ + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_ops = &perf_mmap_vmops; + + mapped = get_mapped(event, event_mapped); + if (mapped) + mapped(event, vma->vm_mm); + + /* + * Try to map it into the page table. On fail, invoke + * perf_mmap_close() to undo the above, as the callsite expects + * full cleanup in this case and therefore does not invoke + * vmops::close(). + */ + ret = map_range(event->rb, vma); + if (ret) + perf_mmap_close(vma); } - /* - * Since pinned accounting is per vm we cannot allow fork() to copy our - * vma. - */ - vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); - vma->vm_ops = &perf_mmap_vmops; - - mapped = get_mapped(event, event_mapped); - if (mapped) - mapped(event, vma->vm_mm); - - /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). - */ - ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); - return ret; } @@ -10776,6 +10774,13 @@ int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { + /* + * Entry point from hardware PMI, interrupts should be disabled here. + * This serializes us against perf_event_remove_from_context() in + * things like perf_event_release_kernel(). + */ + lockdep_assert_irqs_disabled(); + return __perf_event_overflow(event, 1, data, regs); } @@ -10852,6 +10857,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, { struct hw_perf_event *hwc = &event->hw; + /* + * This is: + * - software preempt + * - tracepoint preempt + * - tp_target_task irq (ctx->lock) + * - uprobes preempt/irq + * - kprobes preempt/irq + * - hw_breakpoint irq + * + * Any of these are sufficient to hold off RCU and thus ensure @event + * exists. + */ + lockdep_assert_preemption_disabled(); local64_add(nr, &event->count); if (!regs) @@ -10860,6 +10878,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!is_sampling_event(event)) return; + /* + * Serialize against event_function_call() IPIs like normal overflow + * event handling. Specifically, must not allow + * perf_event_release_kernel() -> perf_remove_from_context() to make + * progress and 'release' the event from under us. + */ + guard(irqsave)(); + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { data->period = nr; return perf_swevent_overflow(event, 1, data, regs); @@ -11358,6 +11386,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct perf_sample_data data; struct perf_event *event; + /* + * Per being a tracepoint, this runs with preemption disabled. + */ + lockdep_assert_preemption_disabled(); + struct perf_raw_record raw = { .frag = { .size = entry_size, @@ -11690,6 +11723,11 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; + /* + * Exception context, will have interrupts disabled. + */ + lockdep_assert_irqs_disabled(); + perf_sample_data_init(&sample, bp->attr.bp_addr, 0); if (!bp->hw.state && !perf_exclude_event(bp, regs)) @@ -12154,7 +12192,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) if (regs && !perf_exclude_event(event, regs)) { if (!(event->attr.exclude_idle && is_idle_task(current))) - if (__perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) ret = HRTIMER_NORESTART; } @@ -14703,7 +14741,7 @@ inherit_event(struct perf_event *parent_event, get_ctx(child_ctx); child_event->ctx = child_ctx; - pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); + pmu_ctx = find_get_pmu_context(parent_event->pmu_ctx->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); return ERR_CAST(pmu_ctx);
diff --git a/kernel/exit.c b/kernel/exit.c index 8a87021..ede3117 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) void __noreturn do_exit(long code) { struct task_struct *tsk = current; + struct kthread *kthread; int group_dead; WARN_ON(irqs_disabled()); WARN_ON(tsk->plug); + kthread = tsk_is_kthread(tsk); + if (unlikely(kthread)) + kthread_do_exit(kthread, code); + kcov_task_exit(tsk); kmsan_task_exit(tsk); @@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code) lockdep_free_task(tsk); do_task_dead(); } +EXPORT_SYMBOL(do_exit); void __noreturn make_task_dead(int signr) {
diff --git a/kernel/fork.c b/kernel/fork.c index e832da9..bc2bf58 100644 --- a/kernel/fork.c +++ b/kernel/fork.c
@@ -1000,6 +1000,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_SCHED_MM_CID tsk->mm_cid.cid = MM_CID_UNSET; tsk->mm_cid.active = 0; + INIT_HLIST_NODE(&tsk->mm_cid.node); #endif return tsk; @@ -1586,7 +1587,6 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk) tsk->mm = mm; tsk->active_mm = mm; - sched_mm_cid_fork(tsk); return 0; } @@ -2498,7 +2498,6 @@ __latent_entropy struct task_struct *copy_process( exit_nsproxy_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { - sched_mm_cid_exit(p); mm_clear_owner(p->mm, p); mmput(p->mm); } @@ -3085,7 +3084,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) return 0; /* don't need lock here; in the worst case we'll do useless copy */ - if (fs->users == 1) + if (!(unshare_flags & CLONE_NEWNS) && fs->users == 1) return 0; *new_fsp = copy_fs_struct(fs);
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index cf7e610..31e83a0 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c
@@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) if (!vma) return FUTEX_NO_NODE; - mpol = vma_policy(vma); + mpol = READ_ONCE(vma->vm_policy); if (!mpol) return FUTEX_NO_NODE;
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index bc1f7e8..7808068 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c
@@ -918,7 +918,7 @@ int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked) int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; - struct task_struct *exiting = NULL; + struct task_struct *exiting; struct rt_mutex_waiter rt_waiter; struct futex_q q = futex_q_init; DEFINE_WAKE_Q(wake_q); @@ -933,6 +933,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl to = futex_setup_timer(time, &timeout, flags, 0); retry: + exiting = NULL; ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out;
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 743c7a7..77ad969 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c
@@ -459,6 +459,14 @@ SYSCALL_DEFINE4(futex_requeue, if (ret) return ret; + /* + * For now mandate both flags are identical, like the sys_futex() + * interface has. If/when we merge the variable sized futex support, + * that patch can modify this test to allow a difference in size. + */ + if (futexes[0].w.flags != futexes[1].w.flags) + return -EINVAL; + cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags,
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 79e655e..ae75815 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c
@@ -168,7 +168,7 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; - expect = kmalloc_obj(observed.lines); + expect = (typeof(expect))kmalloc_obj(observed.lines); if (WARN_ON(!expect)) return false;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ab25b4a..bfc8908 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c
@@ -1144,12 +1144,12 @@ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, lockdep_assert_held(&kprobe_mutex); ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); - if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) + if (ret < 0) return ret; if (*cnt == 0) { ret = register_ftrace_function(ops); - if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) { + if (ret < 0) { /* * At this point, sinec ops is not registered, we should be sefe from * registering empty filter. @@ -1178,6 +1178,10 @@ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int ret; lockdep_assert_held(&kprobe_mutex); + if (unlikely(kprobe_ftrace_disabled)) { + /* Now ftrace is disabled forever, disarm is already done. */ + return 0; + } if (*cnt == 1) { ret = unregister_ftrace_function(ops);
diff --git a/kernel/kthread.c b/kernel/kthread.c index 20451b6..791210d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c
@@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k) return k->worker_private; } -/* - * Variant of to_kthread() that doesn't assume @p is a kthread. - * - * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will - * always remain a kthread. For kthreads p->worker_private always - * points to a struct kthread. For tasks that are not kthreads - * p->worker_private is used to point to other things. - * - * Return NULL for any task that is not a kthread. - */ -static inline struct kthread *__to_kthread(struct task_struct *p) -{ - void *kthread = p->worker_private; - if (kthread && !(p->flags & PF_KTHREAD)) - kthread = NULL; - return kthread; -} - void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) { struct kthread *kthread = to_kthread(tsk); @@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park); bool kthread_should_stop_or_park(void) { - struct kthread *kthread = __to_kthread(current); + struct kthread *kthread = tsk_is_kthread(current); if (!kthread) return false; @@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); */ void *kthread_func(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); if (kthread) return kthread->threadfn; return NULL; @@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data); */ void *kthread_probe_data(struct task_struct *task) { - struct kthread *kthread = __to_kthread(task); + struct kthread *kthread = tsk_is_kthread(task); void *data = NULL; if (kthread) @@ -309,19 +291,8 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) +void kthread_do_exit(struct kthread *kthread, long result) { - struct kthread *kthread = to_kthread(current); kthread->result = result; if (!list_empty(&kthread->affinity_node)) { mutex_lock(&kthread_affinity_lock); @@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result) kthread->preferred_affinity = NULL; } } - do_exit(0); } -EXPORT_SYMBOL(kthread_exit); /** * kthread_complete_and_exit - Exit the current kthread. @@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) bool kthread_is_per_cpu(struct task_struct *p) { - struct kthread *kthread = __to_kthread(p); + struct kthread *kthread = tsk_is_kthread(p); if (!kthread) return false;
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index 8c79058..5acee41 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c
@@ -134,9 +134,12 @@ static LIST_HEAD(luo_file_handler_list); * state that is not preserved. Set by the handler's .preserve() * callback, and must be freed in the handler's .unpreserve() * callback. - * @retrieved: A flag indicating whether a user/kernel in the new kernel has + * @retrieve_status: Status code indicating whether a user/kernel in the new kernel has * successfully called retrieve() on this file. This prevents - * multiple retrieval attempts. + * multiple retrieval attempts. A value of 0 means a retrieve() + * has not been attempted, a positive value means the retrieve() + * was successful, and a negative value means the retrieve() + * failed, and the value is the error code of the call. * @mutex: A mutex that protects the fields of this specific instance * (e.g., @retrieved, @file), ensuring that operations like * retrieving or finishing a file are atomic. @@ -161,7 +164,7 @@ struct luo_file { struct file *file; u64 serialized_data; void *private_data; - bool retrieved; + int retrieve_status; struct mutex mutex; struct list_head list; u64 token; @@ -298,7 +301,6 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) luo_file->file = file; luo_file->fh = fh; luo_file->token = token; - luo_file->retrieved = false; mutex_init(&luo_file->mutex); args.handler = fh; @@ -577,7 +579,12 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token, return -ENOENT; guard(mutex)(&luo_file->mutex); - if (luo_file->retrieved) { + if (luo_file->retrieve_status < 0) { + /* Retrieve was attempted and it failed. Return the error code. */ + return luo_file->retrieve_status; + } + + if (luo_file->retrieve_status > 0) { /* * Someone is asking for this file again, so get a reference * for them. @@ -590,16 +597,19 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token, args.handler = luo_file->fh; args.serialized_data = luo_file->serialized_data; err = luo_file->fh->ops->retrieve(&args); - if (!err) { - luo_file->file = args.file; - - /* Get reference so we can keep this file in LUO until finish */ - get_file(luo_file->file); - *filep = luo_file->file; - luo_file->retrieved = true; + if (err) { + /* Keep the error code for later use. */ + luo_file->retrieve_status = err; + return err; } - return err; + luo_file->file = args.file; + /* Get reference so we can keep this file in LUO until finish */ + get_file(luo_file->file); + *filep = luo_file->file; + luo_file->retrieve_status = 1; + + return 0; } static int luo_file_can_finish_one(struct luo_file_set *file_set, @@ -615,7 +625,7 @@ static int luo_file_can_finish_one(struct luo_file_set *file_set, args.handler = luo_file->fh; args.file = luo_file->file; args.serialized_data = luo_file->serialized_data; - args.retrieved = luo_file->retrieved; + args.retrieve_status = luo_file->retrieve_status; can_finish = luo_file->fh->ops->can_finish(&args); } @@ -632,7 +642,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set, args.handler = luo_file->fh; args.file = luo_file->file; args.serialized_data = luo_file->serialized_data; - args.retrieved = luo_file->retrieved; + args.retrieve_status = luo_file->retrieve_status; luo_file->fh->ops->finish(&args); luo_flb_file_finish(luo_file->fh); @@ -788,7 +798,6 @@ int luo_file_deserialize(struct luo_file_set *file_set, luo_file->file = NULL; luo_file->serialized_data = file_ser[i].data; luo_file->token = file_ser[i].token; - luo_file->retrieved = false; mutex_init(&luo_file->mutex); list_add_tail(&luo_file->list, &file_set->files_list); }
diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c index 7836772..25ae704 100644 --- a/kernel/liveupdate/luo_session.c +++ b/kernel/liveupdate/luo_session.c
@@ -558,8 +558,13 @@ int luo_session_deserialize(void) } scoped_guard(mutex, &session->mutex) { - luo_file_deserialize(&session->file_set, - &sh->ser[i].file_set_ser); + err = luo_file_deserialize(&session->file_set, + &sh->ser[i].file_set_ser); + } + if (err) { + pr_warn("Failed to deserialize files for session [%s] %pe\n", + session->name, ERR_PTR(err)); + return err; } }
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index be74917..43b1bb0 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig
@@ -169,9 +169,10 @@ make them incompatible with the kernel you are running. If unsure, say N. +if MODVERSIONS + choice prompt "Module versioning implementation" - depends on MODVERSIONS help Select the tool used to calculate symbol versions for modules. @@ -206,7 +207,7 @@ config ASM_MODVERSIONS bool - default HAVE_ASM_MODVERSIONS && MODVERSIONS + default HAVE_ASM_MODVERSIONS help This enables module versioning for exported symbols also from assembly. This can be enabled only when the target architecture @@ -214,7 +215,6 @@ config EXTENDED_MODVERSIONS bool "Extended Module Versioning Support" - depends on MODVERSIONS help This enables extended MODVERSIONs support, allowing long symbol names to be versioned. @@ -224,7 +224,6 @@ config BASIC_MODVERSIONS bool "Basic Module Versioning Support" - depends on MODVERSIONS default y help This enables basic MODVERSIONS support, allowing older tools or @@ -237,6 +236,8 @@ This is enabled by default when MODVERSIONS are enabled. If unsure, say Y. +endif # MODVERSIONS + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help @@ -277,10 +278,11 @@ Reject unsigned modules or signed modules for which we don't have a key. Without this, such modules will simply taint the kernel. +if MODULE_SIG || IMA_APPRAISE_MODSIG + config MODULE_SIG_ALL bool "Automatically sign all modules" default y - depends on MODULE_SIG || IMA_APPRAISE_MODSIG help Sign all modules during make modules_install. Without this option, modules must be signed manually, using the scripts/sign-file tool. @@ -290,7 +292,6 @@ choice prompt "Hash algorithm to sign modules" - depends on MODULE_SIG || IMA_APPRAISE_MODSIG default MODULE_SIG_SHA512 help This determines which sort of hashing algorithm will be used during @@ -327,7 +328,6 @@ config MODULE_SIG_HASH string - depends on MODULE_SIG || IMA_APPRAISE_MODSIG default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 @@ -335,6 +335,8 @@ default "sha3-384" if MODULE_SIG_SHA3_384 default "sha3-512" if MODULE_SIG_SHA3_512 +endif # MODULE_SIG || IMA_APPRAISE_MODSIG + config MODULE_COMPRESS bool "Module compression" help @@ -350,9 +352,10 @@ If unsure, say N. +if MODULE_COMPRESS + choice prompt "Module compression type" - depends on MODULE_COMPRESS help Choose the supported algorithm for module compression. @@ -379,7 +382,6 @@ config MODULE_COMPRESS_ALL bool "Automatically compress all modules" default y - depends on MODULE_COMPRESS help Compress all modules during 'make modules_install'. @@ -389,7 +391,6 @@ config MODULE_DECOMPRESS bool "Support in-kernel module decompression" - depends on MODULE_COMPRESS select ZLIB_INFLATE if MODULE_COMPRESS_GZIP select XZ_DEC if MODULE_COMPRESS_XZ select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD @@ -400,6 +401,8 @@ If unsure, say N. +endif # MODULE_COMPRESS + config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS bool "Allow loading of modules with missing namespace imports" help
diff --git a/kernel/module/main.c b/kernel/module/main.c index 2bac4c7..c3ce106 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c
@@ -1568,6 +1568,13 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) break; default: + if (sym[i].st_shndx >= info->hdr->e_shnum) { + pr_err("%s: Symbol %s has an invalid section index %u (max %u)\n", + mod->name, name, sym[i].st_shndx, info->hdr->e_shnum - 1); + ret = -ENOEXEC; + break; + } + /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == info->index.pcpu) secbase = (unsigned long)mod_percpu(mod); @@ -3544,12 +3551,6 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_unlock(&module_mutex); free_module: mod_stat_bump_invalid(info, flags); - /* Free lock-classes; relies on the preceding sync_rcu() */ - for_class_mod_mem_type(type, core_data) { - lockdep_free_key_range(mod->mem[type].base, - mod->mem[type].size); - } - module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy:
diff --git a/kernel/nscommon.c b/kernel/nscommon.c index bdc3c86..3166c1f 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c
@@ -309,3 +309,9 @@ void __ns_ref_active_get(struct ns_common *ns) return; } } + +bool may_see_all_namespaces(void) +{ + return (task_active_pid_ns(current) == &init_pid_ns) && + ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN); +}
diff --git a/kernel/nstree.c b/kernel/nstree.c index f36c59e..6d12e59 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c
@@ -515,32 +515,11 @@ static inline bool __must_check ns_requested(const struct klistns *kls, static inline bool __must_check may_list_ns(const struct klistns *kls, struct ns_common *ns) { - if (kls->user_ns) { - if (kls->userns_capable) - return true; - } else { - struct ns_common *owner; - struct user_namespace *user_ns; - - owner = ns_owner(ns); - if (owner) - user_ns = to_user_ns(owner); - else - user_ns = &init_user_ns; - if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN)) - return true; - } - + if (kls->user_ns && kls->userns_capable) + return true; if (is_current_namespace(ns)) return true; - - if (ns->ns_type != CLONE_NEWUSER) - return false; - - if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN)) - return true; - - return false; + return may_see_all_namespaces(); } static inline void ns_put(struct ns_common *ns) @@ -600,7 +579,7 @@ static ssize_t do_listns_userns(struct klistns *kls) ret = 0; head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head; - kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN); + kls->userns_capable = may_see_all_namespaces(); rcu_read_lock();
diff --git a/kernel/power/em_netlink.c b/kernel/power/em_netlink.c index 5a611d3..4d4fd29 100644 --- a/kernel/power/em_netlink.c +++ b/kernel/power/em_netlink.c
@@ -109,6 +109,8 @@ int dev_energymodel_nl_get_perf_domains_doit(struct sk_buff *skb, id = nla_get_u32(info->attrs[DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID]); pd = em_perf_domain_get_by_id(id); + if (!pd) + return -EINVAL; __em_nl_get_pd_size(pd, &msg_sz); msg = genlmsg_new(msg_sz, GFP_KERNEL);
diff --git a/kernel/power/main.c b/kernel/power/main.c index 5f8c9e1..5429e9f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c
@@ -40,7 +40,7 @@ void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + if (!saved_gfp_count || --saved_gfp_count) return; gfp_allowed_mask = saved_gfp_mask;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6e1321837..a564650 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c
@@ -2855,6 +2855,17 @@ int snapshot_write_finalize(struct snapshot_handle *handle) { int error; + /* + * Call snapshot_write_next() to drain any trailing zero pages, + * but make sure we're in the data page region first. + * This function can return PAGE_SIZE if the kernel was expecting + * another copy page. Return -ENODATA in that situation. + */ + if (handle->cur > nr_meta_pages + 1) { + error = snapshot_write_next(handle); + if (error) + return error > 0 ? -ENODATA : error; + } copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index dc5d614..9b10b57 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h
@@ -502,6 +502,15 @@ do { \ ___locked; \ }) +#define raw_spin_trylock_irqsave_rcu_node(p, flags) \ +({ \ + bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) + #define raw_lockdep_assert_held_rcu_node(p) \ lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 3450c37..a2e2d51 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c
@@ -9,6 +9,7 @@ */ #include <linux/export.h> +#include <linux/irq_work.h> #include <linux/mutex.h> #include <linux/preempt.h> #include <linux/rcupdate_wait.h> @@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp) ssp->srcu_idx_max = 0; INIT_WORK(&ssp->srcu_work, srcu_drive_gp); INIT_LIST_HEAD(&ssp->srcu_work.entry); + init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work); return 0; } @@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); void cleanup_srcu_struct(struct srcu_struct *ssp) { WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); + irq_work_sync(&ssp->srcu_irq_work); flush_work(&ssp->srcu_work); WARN_ON(ssp->srcu_gp_running); WARN_ON(ssp->srcu_gp_waiting); @@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp) } EXPORT_SYMBOL_GPL(srcu_drive_gp); +/* + * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue + * pool->lock while the caller might hold scheduler locks, causing lockdep + * splats due to workqueue_init() doing a wakeup. + */ +void srcu_tiny_irq_work(struct irq_work *irq_work) +{ + struct srcu_struct *ssp; + + ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work); + schedule_work(&ssp->srcu_work); +} +EXPORT_SYMBOL_GPL(srcu_tiny_irq_work); + static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { unsigned long cookie; @@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&ssp->srcu_work); + irq_work_queue(&ssp->srcu_irq_work); else if (list_empty(&ssp->srcu_work.entry)) list_add(&ssp->srcu_work.entry, &srcu_boot_list); }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index aef8e91..0d01cd8 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c
@@ -19,6 +19,7 @@ #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/preempt.h> +#include <linux/irq_work.h> #include <linux/rcupdate_wait.h> #include <linux/sched.h> #include <linux/smp.h> @@ -75,44 +76,9 @@ static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); +static void srcu_irq_work(struct irq_work *work); static void srcu_delay_timer(struct timer_list *t); -/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ -#define spin_lock_rcu_node(p) \ -do { \ - spin_lock(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irq_rcu_node(p) \ -do { \ - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_unlock_irq_rcu_node(p) \ - spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) - -#define spin_lock_irqsave_rcu_node(p, flags) \ -do { \ - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define spin_trylock_irqsave_rcu_node(p, flags) \ -({ \ - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ - \ - if (___locked) \ - smp_mb__after_unlock_lock(); \ - ___locked; \ -}) - -#define spin_unlock_irqrestore_rcu_node(p, flags) \ - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ - /* * Initialize SRCU per-CPU data. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and @@ -131,7 +97,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; @@ -186,7 +152,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) /* Each pass through this loop initializes one srcu_node structure. */ srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -242,7 +208,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) if (!ssp->srcu_sup) return -ENOMEM; if (!is_static) - spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); @@ -252,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); + init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work); ssp->srcu_sup->sda_is_static = is_static; if (!is_static) { ssp->sda = alloc_percpu(struct srcu_data); @@ -263,9 +230,12 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL; ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { - if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL)) + if (!preemptible()) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); + else if (init_srcu_struct_nodes(ssp, GFP_KERNEL)) + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); + else goto err_free_sda; - WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } ssp->srcu_sup->srcu_ssp = ssp; smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, @@ -394,20 +364,20 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) /* Double-checked locking on ->srcu_size-state. */ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } __srcu_transition_to_big(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* * Check to see if the just-encountered contention event justifies * a transition to SRCU_SIZE_BIG. */ -static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) +static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp) { unsigned long j; @@ -429,16 +399,16 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) +static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) { struct srcu_struct *ssp = sdp->ssp; - if (spin_trylock_irqsave_rcu_node(sdp, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_rcu_node(sdp, *flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_rcu_node(sdp, *flags); } /* @@ -447,12 +417,12 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module * parameter permits this. */ -static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) +static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) { - if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) + if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) return; - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); - spin_lock_irqsave_check_contention(ssp); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); + raw_spin_lock_irqsave_check_contention(ssp); } /* @@ -470,13 +440,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } init_srcu_struct_fields(ssp, true); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -742,13 +712,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) unsigned long delay; struct srcu_usage *sup = ssp->srcu_sup; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); delay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (WARN_ON(!delay)) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ + /* Wait for irq_work to finish first as it may queue a new work. */ + irq_work_sync(&sup->irq_work); flush_delayed_work(&sup->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); @@ -960,7 +932,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) mutex_lock(&sup->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); idx = rcu_seq_state(sup->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); if (srcu_gp_is_expedited(ssp)) @@ -971,7 +943,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); mutex_unlock(&sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -983,7 +955,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) } else { idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); srcu_for_each_node_breadth_first(ssp, snp) { - spin_lock_irq_rcu_node(snp); + raw_spin_lock_irq_rcu_node(snp); cbs = false; last_lvl = snp >= sup->level[rcu_num_lvls - 1]; if (last_lvl) @@ -998,7 +970,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) else mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - spin_unlock_irq_rcu_node(snp); + raw_spin_unlock_irq_rcu_node(snp); if (cbs) srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); } @@ -1008,27 +980,27 @@ static void srcu_gp_end(struct srcu_struct *ssp) if (!(gpseq & counter_wrap_check)) for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) sdp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /* Callback initiation done, allow grace periods after next. */ mutex_unlock(&sup->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sup); + raw_spin_lock_irq_rcu_node(sup); gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sup); + raw_spin_unlock_irq_rcu_node(sup); } /* Transition to big if needed. */ @@ -1059,19 +1031,19 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) return; - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); sgsne = snp->srcu_gp_seq_needed_exp; if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) { - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -1109,12 +1081,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf) return; /* GP already done and CBs recorded. */ - spin_lock_irqsave_rcu_node(snp, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); snp_seq = snp->srcu_have_cbs[idx]; if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) { if (snp == snp_leaf && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == snp_leaf && snp_seq != s) { srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); return; @@ -1129,11 +1101,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, sgsne = snp->srcu_gp_seq_needed_exp; if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s))) WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(snp, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_ssp_contention(ssp, &flags); + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -1154,13 +1126,17 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, // it isn't. And it does not have to be. After all, it // can only be executed during early boot when there is only // the one boot CPU running with interrupts still disabled. + // + // Use an irq_work here to avoid acquiring runqueue lock with + // srcu rcu_node::lock held. BPF instrument could introduce the + // opposite dependency, hence we need to break the possible + // locking dependency here. if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sup->work, - !!srcu_get_delay(ssp)); + irq_work_queue(&sup->irq_work); else if (list_empty(&sup->work.work.entry)) list_add(&sup->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sup, flags); + raw_spin_unlock_irqrestore_rcu_node(sup, flags); } /* @@ -1172,9 +1148,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { unsigned long curdelay; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = !srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) @@ -1285,12 +1261,12 @@ static bool srcu_should_expedite(struct srcu_struct *ssp) return false; /* If the local srcu_data structure has callbacks, not idle. */ sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_rcu_node(sdp, flags); + raw_spin_lock_irqsave_rcu_node(sdp, flags); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); return false; /* Callbacks already present, so not idle. */ } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* * No local callbacks, so probabilistically probe global state. @@ -1350,7 +1326,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (rhp) rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); /* @@ -1410,7 +1386,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); /* Ensure that snp node tree is fully initialized before traversing it */ if (ss_state < SRCU_SIZE_WAIT_BARRIER) @@ -1522,7 +1498,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) /* * Make sure that later code is ordered after the SRCU grace - * period. This pairs with the spin_lock_irq_rcu_node() + * period. This pairs with the raw_spin_lock_irq_rcu_node() * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed * because the current CPU might have been totally uninvolved with * (and thus unordered against) that grace period. @@ -1701,7 +1677,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp) */ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) { - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1710,7 +1686,7 @@ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt); } - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); } /** @@ -1761,7 +1737,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) bool needcb = false; struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { WARN_ON_ONCE(1); } else if (sdp->srcu_ec_state == SRCU_EC_PENDING) { @@ -1771,7 +1747,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp) sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, requeue ourselves as an expedited SRCU callback. if (needcb) __call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1795,7 +1771,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) migrate_disable(); sdp = this_cpu_ptr(ssp->sda); - spin_lock_irqsave_sdp_contention(sdp, &flags); + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); if (sdp->srcu_ec_state == SRCU_EC_IDLE) { sdp->srcu_ec_state = SRCU_EC_PENDING; needcb = true; @@ -1804,7 +1780,7 @@ void srcu_expedite_current(struct srcu_struct *ssp) } else { WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST); } - spin_unlock_irqrestore_rcu_node(sdp, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); // If needed, queue an expedited SRCU callback. if (needcb) __call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); @@ -1848,17 +1824,17 @@ static void srcu_advance_state(struct srcu_struct *ssp) */ idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1872,10 +1848,10 @@ static void srcu_advance_state(struct srcu_struct *ssp) return; /* readers present, retry later. */ } srcu_flip(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); ssp->srcu_sup->srcu_n_exp_nodelay = 0; - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); } if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { @@ -1913,7 +1889,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); @@ -1924,7 +1900,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) */ if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } @@ -1932,7 +1908,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); len = ready_cbs.len; - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1947,11 +1923,11 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - spin_lock_irq_rcu_node(sdp); + raw_spin_lock_irq_rcu_node(sdp); rcu_segcblist_add_len(&sdp->srcu_cblist, -len); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - spin_unlock_irq_rcu_node(sdp); + raw_spin_unlock_irq_rcu_node(sdp); /* An SRCU barrier or callbacks from previous nesting work pending */ if (more) srcu_schedule_cbs_sdp(sdp, 0); @@ -1965,7 +1941,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1975,7 +1951,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (pushgp) queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay); @@ -1995,9 +1971,9 @@ static void process_srcu(struct work_struct *work) ssp = sup->srcu_ssp; srcu_advance_state(ssp); - spin_lock_irq_rcu_node(ssp->srcu_sup); + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); curdelay = srcu_get_delay(ssp); - spin_unlock_irq_rcu_node(ssp->srcu_sup); + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); if (curdelay) { WRITE_ONCE(sup->reschedule_count, 0); } else { @@ -2015,6 +1991,23 @@ static void process_srcu(struct work_struct *work) srcu_reschedule(ssp, curdelay); } +static void srcu_irq_work(struct irq_work *work) +{ + struct srcu_struct *ssp; + struct srcu_usage *sup; + unsigned long delay; + unsigned long flags; + + sup = container_of(work, struct srcu_usage, irq_work); + ssp = sup->srcu_ssp; + + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + delay = srcu_get_delay(ssp); + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); + + queue_delayed_work(rcu_gp_wq, &sup->work, !!delay); +} + void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) {
diff --git a/kernel/rseq.c b/kernel/rseq.c index b0973d1..38d3ef5 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c
@@ -80,6 +80,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/types.h> +#include <linux/rseq.h> #include <asm/ptrace.h> #define CREATE_TRACE_POINTS @@ -449,13 +450,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq * size, the required alignment is the original struct rseq alignment. * - * In order to be valid, rseq_len is either the original rseq size, or - * large enough to contain all supported fields, as communicated to + * The rseq_len is required to be greater or equal to the original rseq + * size. In order to be valid, rseq_len is either the original rseq size, + * or large enough to contain all supported fields, as communicated to * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. */ if (rseq_len < ORIG_RSEQ_SIZE || (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7597776..496dff7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -4729,8 +4729,11 @@ void sched_cancel_fork(struct task_struct *p) scx_cancel_fork(p); } +static void sched_mm_cid_fork(struct task_struct *t); + void sched_post_fork(struct task_struct *p) { + sched_mm_cid_fork(p); uclamp_post_fork(p); scx_post_fork(p); } @@ -6830,6 +6833,7 @@ static void __sched notrace __schedule(int sched_mode) /* SCX must consult the BPF scheduler to tell if rq is empty */ if (!rq->nr_running && !scx_enabled()) { next = prev; + rq->next_class = &idle_sched_class; goto picked; } } else if (!preempt && prev_state) { @@ -10616,13 +10620,10 @@ static inline void mm_cid_transit_to_cpu(struct task_struct *t, struct mm_cid_pc } } -static bool mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm) +static void mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm) { /* Remote access to mm::mm_cid::pcpu requires rq_lock */ guard(task_rq_lock)(t); - /* If the task is not active it is not in the users count */ - if (!t->mm_cid.active) - return false; if (cid_on_task(t->mm_cid.cid)) { /* If running on the CPU, put the CID in transit mode, otherwise drop it */ if (task_rq(t)->curr == t) @@ -10630,69 +10631,43 @@ static bool mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm else mm_unset_cid_on_task(t); } - return true; -} - -static void mm_cid_do_fixup_tasks_to_cpus(struct mm_struct *mm) -{ - struct task_struct *p, *t; - unsigned int users; - - /* - * This can obviously race with a concurrent affinity change, which - * increases the number of allowed CPUs for this mm, but that does - * not affect the mode and only changes the CID constraints. A - * possible switch back to per task mode happens either in the - * deferred handler function or in the next fork()/exit(). - * - * The caller has already transferred. The newly incoming task is - * already accounted for, but not yet visible. - */ - users = mm->mm_cid.users - 2; - if (!users) - return; - - guard(rcu)(); - for_other_threads(current, t) { - if (mm_cid_fixup_task_to_cpu(t, mm)) - users--; - } - - if (!users) - return; - - /* Happens only for VM_CLONE processes. */ - for_each_process_thread(p, t) { - if (t == current || t->mm != mm) - continue; - if (mm_cid_fixup_task_to_cpu(t, mm)) { - if (--users == 0) - return; - } - } } static void mm_cid_fixup_tasks_to_cpus(void) { struct mm_struct *mm = current->mm; + struct task_struct *t; - mm_cid_do_fixup_tasks_to_cpus(mm); + lockdep_assert_held(&mm->mm_cid.mutex); + + hlist_for_each_entry(t, &mm->mm_cid.user_list, mm_cid.node) { + /* Current has already transferred before invoking the fixup. */ + if (t != current) + mm_cid_fixup_task_to_cpu(t, mm); + } + mm_cid_complete_transit(mm, MM_CID_ONCPU); } static bool sched_mm_cid_add_user(struct task_struct *t, struct mm_struct *mm) { + lockdep_assert_held(&mm->mm_cid.lock); + t->mm_cid.active = 1; + hlist_add_head(&t->mm_cid.node, &mm->mm_cid.user_list); mm->mm_cid.users++; return mm_update_max_cids(mm); } -void sched_mm_cid_fork(struct task_struct *t) +static void sched_mm_cid_fork(struct task_struct *t) { struct mm_struct *mm = t->mm; bool percpu; - WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET); + if (!mm) + return; + + WARN_ON_ONCE(t->mm_cid.cid != MM_CID_UNSET); guard(mutex)(&mm->mm_cid.mutex); scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) { @@ -10731,12 +10706,13 @@ void sched_mm_cid_fork(struct task_struct *t) static bool sched_mm_cid_remove_user(struct task_struct *t) { + lockdep_assert_held(&t->mm->mm_cid.lock); + t->mm_cid.active = 0; - scoped_guard(preempt) { - /* Clear the transition bit */ - t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid); - mm_unset_cid_on_task(t); - } + /* Clear the transition bit */ + t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid); + mm_unset_cid_on_task(t); + hlist_del_init(&t->mm_cid.node); t->mm->mm_cid.users--; return mm_update_max_cids(t->mm); } @@ -10879,11 +10855,13 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p) mutex_init(&mm->mm_cid.mutex); mm->mm_cid.irq_work = IRQ_WORK_INIT_HARD(mm_cid_irq_work); INIT_WORK(&mm->mm_cid.work, mm_cid_work_fn); + INIT_HLIST_HEAD(&mm->mm_cid.user_list); cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); bitmap_zero(mm_cidmask(mm), num_possible_cpus()); } #else /* CONFIG_SCHED_MM_CID */ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk) { } +static inline void sched_mm_cid_fork(struct task_struct *t) { } #endif /* !CONFIG_SCHED_MM_CID */ static DEFINE_PER_CPU(struct sched_change_ctx, sched_change_ctx);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index b24f40f..15bf45b 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c
@@ -902,6 +902,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread; + u64 avruntime; struct sched_entity *last, *first, *root; struct rq *rq = cpu_rq(cpu); unsigned long flags; @@ -925,6 +926,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) if (last) right_vruntime = last->vruntime; zero_vruntime = cfs_rq->zero_vruntime; + avruntime = avg_vruntime(cfs_rq); raw_spin_rq_unlock_irqrestore(rq, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline", @@ -934,7 +936,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime", SPLIT_NS(zero_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime", - SPLIT_NS(avg_vruntime(cfs_rq))); + SPLIT_NS(avruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime", SPLIT_NS(right_vruntime)); spread = right_vruntime - left_vruntime;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 62b1f3a..064eaa7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c
@@ -976,8 +976,12 @@ static bool scx_dsq_priq_less(struct rb_node *node_a, static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta) { - /* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */ - WRITE_ONCE(dsq->nr, dsq->nr + delta); + /* + * scx_bpf_dsq_nr_queued() reads ->nr without locking. Use READ_ONCE() + * on the read side and WRITE_ONCE() on the write side to properly + * annotate the concurrent lockless access and avoid KCSAN warnings. + */ + WRITE_ONCE(dsq->nr, READ_ONCE(dsq->nr) + delta); } static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p) @@ -1099,22 +1103,13 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq, } /* seq records the order tasks are queued, used by BPF DSQ iterator */ - dsq->seq++; + WRITE_ONCE(dsq->seq, dsq->seq + 1); p->scx.dsq_seq = dsq->seq; dsq_mod_nr(dsq, 1); p->scx.dsq = dsq; /* - * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the - * direct dispatch path, but we clear them here because the direct - * dispatch verdict may be overridden on the enqueue path during e.g. - * bypass. - */ - p->scx.ddsp_dsq_id = SCX_DSQ_INVALID; - p->scx.ddsp_enq_flags = 0; - - /* * We're transitioning out of QUEUEING or DISPATCHING. store_release to * match waiters' load_acquire. */ @@ -1279,12 +1274,34 @@ static void mark_direct_dispatch(struct scx_sched *sch, p->scx.ddsp_enq_flags = enq_flags; } +/* + * Clear @p direct dispatch state when leaving the scheduler. + * + * Direct dispatch state must be cleared in the following cases: + * - direct_dispatch(): cleared on the synchronous enqueue path, deferred + * dispatch keeps the state until consumed + * - process_ddsp_deferred_locals(): cleared after consuming deferred state, + * - do_enqueue_task(): cleared on enqueue fallbacks where the dispatch + * verdict is ignored (local/global/bypass) + * - dequeue_task_scx(): cleared after dispatch_dequeue(), covering deferred + * cancellation and holding_cpu races + * - scx_disable_task(): cleared for queued wakeup tasks, which are excluded by + * the scx_bypass() loop, so that stale state is not reused by a subsequent + * scheduler instance + */ +static inline void clear_direct_dispatch(struct task_struct *p) +{ + p->scx.ddsp_dsq_id = SCX_DSQ_INVALID; + p->scx.ddsp_enq_flags = 0; +} + static void direct_dispatch(struct scx_sched *sch, struct task_struct *p, u64 enq_flags) { struct rq *rq = task_rq(p); struct scx_dispatch_q *dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p); + u64 ddsp_enq_flags; touch_core_sched_dispatch(rq, p); @@ -1325,8 +1342,10 @@ static void direct_dispatch(struct scx_sched *sch, struct task_struct *p, return; } - dispatch_enqueue(sch, dsq, p, - p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS); + ddsp_enq_flags = p->scx.ddsp_enq_flags; + clear_direct_dispatch(p); + + dispatch_enqueue(sch, dsq, p, ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS); } static bool scx_rq_online(struct rq *rq) @@ -1435,6 +1454,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, */ touch_core_sched(rq, p); refill_task_slice_dfl(sch, p); + clear_direct_dispatch(p); dispatch_enqueue(sch, dsq, p, enq_flags); } @@ -1466,16 +1486,15 @@ static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at) p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; } -static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags) +static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int core_enq_flags) { struct scx_sched *sch = scx_root; int sticky_cpu = p->scx.sticky_cpu; + u64 enq_flags = core_enq_flags | rq->scx.extra_enq_flags; if (enq_flags & ENQUEUE_WAKEUP) rq->scx.flags |= SCX_RQ_IN_WAKEUP; - enq_flags |= rq->scx.extra_enq_flags; - if (sticky_cpu >= 0) p->scx.sticky_cpu = -1; @@ -1607,6 +1626,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags sub_nr_running(rq, 1); dispatch_dequeue(rq, p); + clear_direct_dispatch(p); return true; } @@ -2290,13 +2310,15 @@ static void process_ddsp_deferred_locals(struct rq *rq) struct task_struct, scx.dsq_list.node))) { struct scx_sched *sch = scx_root; struct scx_dispatch_q *dsq; + u64 dsq_id = p->scx.ddsp_dsq_id; + u64 enq_flags = p->scx.ddsp_enq_flags; list_del_init(&p->scx.dsq_list.node); + clear_direct_dispatch(p); - dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p); + dsq = find_dsq_for_dispatch(sch, rq, dsq_id, p); if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL)) - dispatch_to_local_dsq(sch, rq, dsq, p, - p->scx.ddsp_enq_flags); + dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags); } } @@ -2401,7 +2423,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, { struct scx_sched *sch = scx_root; - /* see kick_cpus_irq_workfn() */ + /* see kick_sync_wait_bal_cb() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); update_curr_scx(rq); @@ -2444,6 +2466,48 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, switch_class(rq, next); } +static void kick_sync_wait_bal_cb(struct rq *rq) +{ + struct scx_kick_syncs __rcu *ks = __this_cpu_read(scx_kick_syncs); + unsigned long *ksyncs = rcu_dereference_sched(ks)->syncs; + bool waited; + s32 cpu; + + /* + * Drop rq lock and enable IRQs while waiting. IRQs must be enabled + * — a target CPU may be waiting for us to process an IPI (e.g. TLB + * flush) while we wait for its kick_sync to advance. + * + * Also, keep advancing our own kick_sync so that new kick_sync waits + * targeting us, which can start after we drop the lock, cannot form + * cyclic dependencies. + */ +retry: + waited = false; + for_each_cpu(cpu, rq->scx.cpus_to_sync) { + /* + * smp_load_acquire() pairs with smp_store_release() on + * kick_sync updates on the target CPUs. + */ + if (cpu == cpu_of(rq) || + smp_load_acquire(&cpu_rq(cpu)->scx.kick_sync) != ksyncs[cpu]) { + cpumask_clear_cpu(cpu, rq->scx.cpus_to_sync); + continue; + } + + raw_spin_rq_unlock_irq(rq); + while (READ_ONCE(cpu_rq(cpu)->scx.kick_sync) == ksyncs[cpu]) { + smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); + cpu_relax(); + } + raw_spin_rq_lock_irq(rq); + waited = true; + } + + if (waited) + goto retry; +} + static struct task_struct *first_local_task(struct rq *rq) { return list_first_entry_or_null(&rq->scx.local_dsq.list, @@ -2457,10 +2521,10 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) bool keep_prev; struct task_struct *p; - /* see kick_cpus_irq_workfn() */ + /* see kick_sync_wait_bal_cb() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); - rq->next_class = &ext_sched_class; + rq_modified_begin(rq, &ext_sched_class); rq_unpin_lock(rq, rf); balance_one(rq, prev); @@ -2468,6 +2532,17 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) maybe_queue_balance_callback(rq); /* + * Defer to a balance callback which can drop rq lock and enable + * IRQs. Waiting directly in the pick path would deadlock against + * CPUs sending us IPIs (e.g. TLB flushes) while we wait for them. + */ + if (unlikely(rq->scx.kick_sync_pending)) { + rq->scx.kick_sync_pending = false; + queue_balance_callback(rq, &rq->scx.kick_sync_bal_cb, + kick_sync_wait_bal_cb); + } + + /* * If any higher-priority sched class enqueued a runnable task on * this rq during balance_one(), abort and return RETRY_TASK, so * that the scheduler loop can restart. @@ -2475,7 +2550,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) * If @force_scx is true, always try to pick a SCHED_EXT task, * regardless of any higher-priority sched classes activity. */ - if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class)) + if (!force_scx && rq_modified_above(rq, &ext_sched_class)) return RETRY_TASK; keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; @@ -2735,7 +2810,7 @@ static bool check_rq_for_timeouts(struct rq *rq) unsigned long last_runnable = p->scx.runnable_at; if (unlikely(time_after(jiffies, - last_runnable + scx_watchdog_timeout))) { + last_runnable + READ_ONCE(scx_watchdog_timeout)))) { u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable); scx_exit(sch, SCX_EXIT_ERROR_STALL, 0, @@ -2763,7 +2838,7 @@ static void scx_watchdog_workfn(struct work_struct *work) cond_resched(); } queue_delayed_work(system_unbound_wq, to_delayed_work(work), - scx_watchdog_timeout / 2); + READ_ONCE(scx_watchdog_timeout) / 2); } void scx_tick(struct rq *rq) @@ -2959,6 +3034,8 @@ static void scx_disable_task(struct task_struct *p) lockdep_assert_rq_held(rq); WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); + clear_direct_dispatch(p); + if (SCX_HAS_OP(sch, disable)) SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p); scx_set_task_state(p, SCX_TASK_READY); @@ -3585,7 +3662,6 @@ static int scx_cgroup_init(struct scx_sched *sch) ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL, css->cgroup, &args); if (ret) { - css_put(css); scx_error(sch, "ops.cgroup_init() failed (%d)", ret); return ret; } @@ -3708,7 +3784,9 @@ static void scx_kobj_release(struct kobject *kobj) static ssize_t scx_attr_ops_show(struct kobject *kobj, struct kobj_attribute *ka, char *buf) { - return sysfs_emit(buf, "%s\n", scx_root->ops.name); + struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj); + + return sysfs_emit(buf, "%s\n", sch->ops.name); } SCX_ATTR(ops); @@ -3752,7 +3830,9 @@ static const struct kobj_type scx_ktype = { static int scx_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { - return add_uevent_var(env, "SCXOPS=%s", scx_root->ops.name); + const struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj); + + return add_uevent_var(env, "SCXOPS=%s", sch->ops.name); } static const struct kset_uevent_ops scx_uevent_ops = { @@ -3901,8 +3981,8 @@ static u32 bypass_lb_cpu(struct scx_sched *sch, struct rq *rq, * consider offloading iff the total queued duration is over the * threshold. */ - min_delta_us = scx_bypass_lb_intv_us / SCX_BYPASS_LB_MIN_DELTA_DIV; - if (delta < DIV_ROUND_UP(min_delta_us, scx_slice_bypass_us)) + min_delta_us = READ_ONCE(scx_bypass_lb_intv_us) / SCX_BYPASS_LB_MIN_DELTA_DIV; + if (delta < DIV_ROUND_UP(min_delta_us, READ_ONCE(scx_slice_bypass_us))) return 0; raw_spin_rq_lock_irq(rq); @@ -4130,7 +4210,7 @@ static void scx_bypass(bool bypass) WARN_ON_ONCE(scx_bypass_depth <= 0); if (scx_bypass_depth != 1) goto unlock; - WRITE_ONCE(scx_slice_dfl, scx_slice_bypass_us * NSEC_PER_USEC); + WRITE_ONCE(scx_slice_dfl, READ_ONCE(scx_slice_bypass_us) * NSEC_PER_USEC); bypass_timestamp = ktime_get_ns(); if (sch) scx_add_event(sch, SCX_EV_BYPASS_ACTIVATE, 1); @@ -4423,10 +4503,19 @@ static void scx_disable_workfn(struct kthread_work *work) scx_bypass(false); } +/* + * Claim the exit on @sch. The caller must ensure that the helper kthread work + * is kicked before the current task can be preempted. Once exit_kind is + * claimed, scx_error() can no longer trigger, so if the current task gets + * preempted and the BPF scheduler fails to schedule it back, the helper work + * will never be kicked and the whole system can wedge. + */ static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind) { int none = SCX_EXIT_NONE; + lockdep_assert_preemption_disabled(); + if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind)) return false; @@ -4449,6 +4538,7 @@ static void scx_disable(enum scx_exit_kind kind) rcu_read_lock(); sch = rcu_dereference(scx_root); if (sch) { + guard(preempt)(); scx_claim_exit(sch, kind); kthread_queue_work(sch->helper, &sch->disable_work); } @@ -4697,6 +4787,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) if (!cpumask_empty(rq->scx.cpus_to_wait)) dump_line(&ns, " cpus_to_wait : %*pb", cpumask_pr_args(rq->scx.cpus_to_wait)); + if (!cpumask_empty(rq->scx.cpus_to_sync)) + dump_line(&ns, " cpus_to_sync : %*pb", + cpumask_pr_args(rq->scx.cpus_to_sync)); used = seq_buf_used(&ns); if (SCX_HAS_OP(sch, dump_cpu)) { @@ -4771,6 +4864,8 @@ static bool scx_vexit(struct scx_sched *sch, { struct scx_exit_info *ei = sch->exit_info; + guard(preempt)(); + if (!scx_claim_exit(sch, kind)) return false; @@ -4955,20 +5050,30 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops) return 0; } -static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) +/* + * scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid + * starvation. During the READY -> ENABLED task switching loop, the calling + * thread's sched_class gets switched from fair to ext. As fair has higher + * priority than ext, the calling thread can be indefinitely starved under + * fair-class saturation, leading to a system hang. + */ +struct scx_enable_cmd { + struct kthread_work work; + struct sched_ext_ops *ops; + int ret; +}; + +static void scx_enable_workfn(struct kthread_work *work) { + struct scx_enable_cmd *cmd = + container_of(work, struct scx_enable_cmd, work); + struct sched_ext_ops *ops = cmd->ops; struct scx_sched *sch; struct scx_task_iter sti; struct task_struct *p; unsigned long timeout; int i, cpu, ret; - if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), - cpu_possible_mask)) { - pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); - return -EINVAL; - } - mutex_lock(&scx_enable_mutex); if (scx_enable_state() != SCX_DISABLED) { @@ -5060,7 +5165,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) WRITE_ONCE(scx_watchdog_timeout, timeout); WRITE_ONCE(scx_watchdog_timestamp, jiffies); queue_delayed_work(system_unbound_wq, &scx_watchdog_work, - scx_watchdog_timeout / 2); + READ_ONCE(scx_watchdog_timeout) / 2); /* * Once __scx_enabled is set, %current can be switched to SCX anytime. @@ -5185,13 +5290,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) atomic_long_inc(&scx_enable_seq); - return 0; + cmd->ret = 0; + return; err_free_ksyncs: free_kick_syncs(); err_unlock: mutex_unlock(&scx_enable_mutex); - return ret; + cmd->ret = ret; + return; err_disable_unlock_all: scx_cgroup_unlock(); @@ -5210,7 +5317,42 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) */ scx_error(sch, "scx_enable() failed (%d)", ret); kthread_flush_work(&sch->disable_work); - return 0; + cmd->ret = 0; +} + +static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) +{ + static struct kthread_worker *helper; + static DEFINE_MUTEX(helper_mutex); + struct scx_enable_cmd cmd; + + if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), + cpu_possible_mask)) { + pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); + return -EINVAL; + } + + if (!READ_ONCE(helper)) { + mutex_lock(&helper_mutex); + if (!helper) { + struct kthread_worker *w = + kthread_run_worker(0, "scx_enable_helper"); + if (IS_ERR_OR_NULL(w)) { + mutex_unlock(&helper_mutex); + return -ENOMEM; + } + sched_set_fifo(w->task); + WRITE_ONCE(helper, w); + } + mutex_unlock(&helper_mutex); + } + + kthread_init_work(&cmd.work, scx_enable_workfn); + cmd.ops = ops; + + kthread_queue_work(READ_ONCE(helper), &cmd.work); + kthread_flush_work(&cmd.work); + return cmd.ret; } @@ -5545,11 +5687,11 @@ static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *ksyncs) if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) { if (cur_class == &ext_sched_class) { + cpumask_set_cpu(cpu, this_scx->cpus_to_sync); ksyncs[cpu] = rq->scx.kick_sync; should_wait = true; - } else { - cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); } + cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); } resched_curr(rq); @@ -5604,27 +5746,15 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work) cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle); } - if (!should_wait) - return; - - for_each_cpu(cpu, this_scx->cpus_to_wait) { - unsigned long *wait_kick_sync = &cpu_rq(cpu)->scx.kick_sync; - - /* - * Busy-wait until the task running at the time of kicking is no - * longer running. This can be used to implement e.g. core - * scheduling. - * - * smp_cond_load_acquire() pairs with store_releases in - * pick_task_scx() and put_prev_task_scx(). The former breaks - * the wait if SCX's scheduling path is entered even if the same - * task is picked subsequently. The latter is necessary to break - * the wait when $cpu is taken by a higher sched class. - */ - if (cpu != cpu_of(this_rq)) - smp_cond_load_acquire(wait_kick_sync, VAL != ksyncs[cpu]); - - cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); + /* + * Can't wait in hardirq — kick_sync can't advance, deadlocking if + * CPUs wait for each other. Defer to kick_sync_wait_bal_cb(). + */ + if (should_wait) { + raw_spin_rq_lock(this_rq); + this_scx->kick_sync_pending = true; + resched_curr(this_rq); + raw_spin_rq_unlock(this_rq); } } @@ -5729,6 +5859,7 @@ void __init init_sched_ext_class(void) BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); + BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_sync, GFP_KERNEL, n)); rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index c5a3b0b..44c3a50 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c
@@ -543,7 +543,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * piled up on it even if there is an idle core elsewhere on * the system. */ - waker_node = cpu_to_node(cpu); + waker_node = scx_cpu_node_if_enabled(cpu); if (!(current->flags & PF_EXITING) && cpu_rq(cpu)->scx.local_dsq.nr == 0 && (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) && @@ -663,9 +663,8 @@ void scx_idle_init_masks(void) BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.cpu, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.smt, GFP_KERNEL)); - /* Allocate per-node idle cpumasks */ - scx_idle_node_masks = kzalloc_objs(*scx_idle_node_masks, - num_possible_nodes()); + /* Allocate per-node idle cpumasks (use nr_node_ids for non-contiguous NUMA nodes) */ + scx_idle_node_masks = kzalloc_objs(*scx_idle_node_masks, nr_node_ids); BUG_ON(!scx_idle_node_masks); for_each_node(i) { @@ -861,25 +860,32 @@ static bool check_builtin_idle_enabled(struct scx_sched *sch) * code. * * We can't simply check whether @p->migration_disabled is set in a - * sched_ext callback, because migration is always disabled for the current - * task while running BPF code. + * sched_ext callback, because the BPF prolog (__bpf_prog_enter) may disable + * migration for the current task while running BPF code. * - * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively - * disable and re-enable migration. For this reason, the current task - * inside a sched_ext callback is always a migration-disabled task. + * Since the BPF prolog calls migrate_disable() only when CONFIG_PREEMPT_RCU + * is enabled (via rcu_read_lock_dont_migrate()), migration_disabled == 1 for + * the current task is ambiguous only in that case: it could be from the BPF + * prolog rather than a real migrate_disable() call. * - * Therefore, when @p->migration_disabled == 1, check whether @p is the - * current task or not: if it is, then migration was not disabled before - * entering the callback, otherwise migration was disabled. + * Without CONFIG_PREEMPT_RCU, the BPF prolog never calls migrate_disable(), + * so migration_disabled == 1 always means the task is truly + * migration-disabled. + * + * Therefore, when migration_disabled == 1 and CONFIG_PREEMPT_RCU is enabled, + * check whether @p is the current task or not: if it is, then migration was + * not disabled before entering the callback, otherwise migration was disabled. * * Returns true if @p is migration-disabled, false otherwise. */ static bool is_bpf_migration_disabled(const struct task_struct *p) { - if (p->migration_disabled == 1) - return p != current; - else - return p->migration_disabled; + if (p->migration_disabled == 1) { + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + return p != current; + return true; + } + return p->migration_disabled; } static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h index 386c677..00b4505 100644 --- a/kernel/sched/ext_internal.h +++ b/kernel/sched/ext_internal.h
@@ -74,7 +74,7 @@ enum scx_exit_flags { * info communication. The following flag indicates whether ops.init() * finished successfully. */ - SCX_EFLAG_INITIALIZED, + SCX_EFLAG_INITIALIZED = 1LLU << 0, }; /* @@ -1035,26 +1035,108 @@ static const char *scx_enable_state_str[] = { }; /* - * sched_ext_entity->ops_state + * Task Ownership State Machine (sched_ext_entity->ops_state) * - * Used to track the task ownership between the SCX core and the BPF scheduler. - * State transitions look as follows: + * The sched_ext core uses this state machine to track task ownership + * between the SCX core and the BPF scheduler. This allows the BPF + * scheduler to dispatch tasks without strict ordering requirements, while + * the SCX core safely rejects invalid dispatches. * - * NONE -> QUEUEING -> QUEUED -> DISPATCHING - * ^ | | - * | v v - * \-------------------------------/ + * State Transitions * - * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call - * sites for explanations on the conditions being waited upon and why they are - * safe. Transitions out of them into NONE or QUEUED must store_release and the - * waiters should load_acquire. + * .------------> NONE (owned by SCX core) + * | | ^ + * | enqueue | | direct dispatch + * | v | + * | QUEUEING -------' + * | | + * | enqueue | + * | completes | + * | v + * | QUEUED (owned by BPF scheduler) + * | | + * | dispatch | + * | | + * | v + * | DISPATCHING + * | | + * | dispatch | + * | completes | + * `---------------' * - * Tracking scx_ops_state enables sched_ext core to reliably determine whether - * any given task can be dispatched by the BPF scheduler at all times and thus - * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler - * to try to dispatch any task anytime regardless of its state as the SCX core - * can safely reject invalid dispatches. + * State Descriptions + * + * - %SCX_OPSS_NONE: + * Task is owned by the SCX core. It's either on a run queue, running, + * or being manipulated by the core scheduler. The BPF scheduler has no + * claim on this task. + * + * - %SCX_OPSS_QUEUEING: + * Transitional state while transferring a task from the SCX core to + * the BPF scheduler. The task's rq lock is held during this state. + * Since QUEUEING is both entered and exited under the rq lock, dequeue + * can never observe this state (it would be a BUG). When finishing a + * dispatch, if the task is still in %SCX_OPSS_QUEUEING the completion + * path busy-waits for it to leave this state (via wait_ops_state()) + * before retrying. + * + * - %SCX_OPSS_QUEUED: + * Task is owned by the BPF scheduler. It's on a DSQ (dispatch queue) + * and the BPF scheduler is responsible for dispatching it. A QSEQ + * (queue sequence number) is embedded in this state to detect + * dispatch/dequeue races: if a task is dequeued and re-enqueued, the + * QSEQ changes and any in-flight dispatch operations targeting the old + * QSEQ are safely ignored. + * + * - %SCX_OPSS_DISPATCHING: + * Transitional state while transferring a task from the BPF scheduler + * back to the SCX core. This state indicates the BPF scheduler has + * selected the task for execution. When dequeue needs to take the task + * off a DSQ and it is still in %SCX_OPSS_DISPATCHING, the dequeue path + * busy-waits for it to leave this state (via wait_ops_state()) before + * proceeding. Exits to %SCX_OPSS_NONE when dispatch completes. + * + * Memory Ordering + * + * Transitions out of %SCX_OPSS_QUEUEING and %SCX_OPSS_DISPATCHING into + * %SCX_OPSS_NONE or %SCX_OPSS_QUEUED must use atomic_long_set_release() + * and waiters must use atomic_long_read_acquire(). This ensures proper + * synchronization between concurrent operations. + * + * Cross-CPU Task Migration + * + * When moving a task in the %SCX_OPSS_DISPATCHING state, we can't simply + * grab the target CPU's rq lock because a concurrent dequeue might be + * waiting on %SCX_OPSS_DISPATCHING while holding the source rq lock + * (deadlock). + * + * The sched_ext core uses a "lock dancing" protocol coordinated by + * p->scx.holding_cpu. When moving a task to a different rq: + * + * 1. Verify task can be moved (CPU affinity, migration_disabled, etc.) + * 2. Set p->scx.holding_cpu to the current CPU + * 3. Set task state to %SCX_OPSS_NONE; dequeue waits while DISPATCHING + * is set, so clearing DISPATCHING first prevents the circular wait + * (safe to lock the rq we need) + * 4. Unlock the current CPU's rq + * 5. Lock src_rq (where the task currently lives) + * 6. Verify p->scx.holding_cpu == current CPU, if not, dequeue won the + * race (dequeue clears holding_cpu to -1 when it takes the task), in + * this case migration is aborted + * 7. If src_rq == dst_rq: clear holding_cpu and enqueue directly + * into dst_rq's local DSQ (no lock swap needed) + * 8. Otherwise: call move_remote_task_to_local_dsq(), which releases + * src_rq, locks dst_rq, and performs the deactivate/activate + * migration cycle (dst_rq is held on return) + * 9. Unlock dst_rq and re-lock the current CPU's rq to restore + * the lock state expected by the caller + * + * If any verification fails, abort the migration. + * + * This state tracking allows the BPF scheduler to try to dispatch any task + * at any time regardless of its state. The SCX core can safely + * reject/ignore invalid dispatches, simplifying the BPF scheduler + * implementation. */ enum scx_ops_state { SCX_OPSS_NONE, /* owned by the SCX core */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eea99ec0..ab411471 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -589,6 +589,21 @@ static inline bool entity_before(const struct sched_entity *a, return vruntime_cmp(a->deadline, "<", b->deadline); } +/* + * Per avg_vruntime() below, cfs_rq::zero_vruntime is only slightly stale + * and this value should be no more than two lag bounds. Which puts it in the + * general order of: + * + * (slice + TICK_NSEC) << NICE_0_LOAD_SHIFT + * + * which is around 44 bits in size (on 64bit); that is 20 for + * NICE_0_LOAD_SHIFT, another 20 for NSEC_PER_MSEC and then a handful for + * however many msec the actual slice+tick ends up begin. + * + * (disregarding the actual divide-by-weight part makes for the worst case + * weight of 2, which nicely cancels vs the fuzz in zero_vruntime not actually + * being the zero-lag point). + */ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) { return vruntime_op(se->vruntime, "-", cfs_rq->zero_vruntime); @@ -676,41 +691,65 @@ sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) } static inline -void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) +void update_zero_vruntime(struct cfs_rq *cfs_rq, s64 delta) { /* - * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight + * v' = v + d ==> sum_w_vruntime' = sum_w_vruntime - d*sum_weight */ cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta; + cfs_rq->zero_vruntime += delta; } /* - * Specifically: avg_runtime() + 0 must result in entity_eligible() := true + * Specifically: avg_vruntime() + 0 must result in entity_eligible() := true * For this to be so, the result of this function must have a left bias. + * + * Called in: + * - place_entity() -- before enqueue + * - update_entity_lag() -- before dequeue + * - update_deadline() -- slice expiration + * + * This means it is one entry 'behind' but that puts it close enough to where + * the bound on entity_key() is at most two lag bounds. */ u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; - s64 avg = cfs_rq->sum_w_vruntime; - long load = cfs_rq->sum_weight; + long weight = cfs_rq->sum_weight; + s64 delta = 0; - if (curr && curr->on_rq) { - unsigned long weight = scale_load_down(curr->load.weight); + if (curr && !curr->on_rq) + curr = NULL; - avg += entity_key(cfs_rq, curr) * weight; - load += weight; - } + if (weight) { + s64 runtime = cfs_rq->sum_w_vruntime; - if (load) { + if (curr) { + unsigned long w = scale_load_down(curr->load.weight); + + runtime += entity_key(cfs_rq, curr) * w; + weight += w; + } + /* sign flips effective floor / ceiling */ - if (avg < 0) - avg -= (load - 1); - avg = div_s64(avg, load); + if (runtime < 0) + runtime -= (weight - 1); + + delta = div_s64(runtime, weight); + } else if (curr) { + /* + * When there is but one element, it is the average. + */ + delta = curr->vruntime - cfs_rq->zero_vruntime; } - return cfs_rq->zero_vruntime + avg; + update_zero_vruntime(cfs_rq, delta); + + return cfs_rq->zero_vruntime; } +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq); + /* * lag_i = S - s_i = w_i * (V - v_i) * @@ -724,17 +763,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * EEVDF gives the following limit for a steady state system: * * -r_max < lag < max(r_max, q) - * - * XXX could add max_slice to the augmented data to track this. */ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) { + u64 max_slice = cfs_rq_max_slice(cfs_rq) + TICK_NSEC; s64 vlag, limit; WARN_ON_ONCE(!se->on_rq); vlag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + limit = calc_delta_fair(max_slice, se); se->vlag = clamp(vlag, -limit, limit); } @@ -777,16 +815,6 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) return vruntime_eligible(cfs_rq, se->vruntime); } -static void update_zero_vruntime(struct cfs_rq *cfs_rq) -{ - u64 vruntime = avg_vruntime(cfs_rq); - s64 delta = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime); - - sum_w_vruntime_update(cfs_rq, delta); - - cfs_rq->zero_vruntime = vruntime; -} - static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) { struct sched_entity *root = __pick_root_entity(cfs_rq); @@ -802,6 +830,21 @@ static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) return min_slice; } +static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq) +{ + struct sched_entity *root = __pick_root_entity(cfs_rq); + struct sched_entity *curr = cfs_rq->curr; + u64 max_slice = 0ULL; + + if (curr && curr->on_rq) + max_slice = curr->slice; + + if (root) + max_slice = max(max_slice, root->max_slice); + + return max_slice; +} + static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) { return entity_before(__node_2_se(a), __node_2_se(b)); @@ -826,6 +869,15 @@ static inline void __min_slice_update(struct sched_entity *se, struct rb_node *n } } +static inline void __max_slice_update(struct sched_entity *se, struct rb_node *node) +{ + if (node) { + struct sched_entity *rse = __node_2_se(node); + if (rse->max_slice > se->max_slice) + se->max_slice = rse->max_slice; + } +} + /* * se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime) */ @@ -833,6 +885,7 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit) { u64 old_min_vruntime = se->min_vruntime; u64 old_min_slice = se->min_slice; + u64 old_max_slice = se->max_slice; struct rb_node *node = &se->run_node; se->min_vruntime = se->vruntime; @@ -843,8 +896,13 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit) __min_slice_update(se, node->rb_right); __min_slice_update(se, node->rb_left); + se->max_slice = se->slice; + __max_slice_update(se, node->rb_right); + __max_slice_update(se, node->rb_left); + return se->min_vruntime == old_min_vruntime && - se->min_slice == old_min_slice; + se->min_slice == old_min_slice && + se->max_slice == old_max_slice; } RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, @@ -856,7 +914,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { sum_w_vruntime_add(cfs_rq, se); - update_zero_vruntime(cfs_rq); se->min_vruntime = se->vruntime; se->min_slice = se->slice; rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, @@ -868,7 +925,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, &min_vruntime_cb); sum_w_vruntime_sub(cfs_rq, se); - update_zero_vruntime(cfs_rq); } struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) @@ -1075,6 +1131,7 @@ static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) * EEVDF: vd_i = ve_i + r_i / w_i */ se->deadline = se->vruntime + calc_delta_fair(se->slice, se); + avg_vruntime(cfs_rq); /* * The task has consumed its request, reschedule. @@ -3790,6 +3847,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; + bool rel_vprot = false; + u64 vprot; if (se->on_rq) { /* commit outstanding execution time */ @@ -3797,6 +3856,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, update_entity_lag(cfs_rq, se); se->deadline -= se->vruntime; se->rel_deadline = 1; + if (curr && protect_slice(se)) { + vprot = se->vprot - se->vruntime; + rel_vprot = true; + } + cfs_rq->nr_queued--; if (!curr) __dequeue_entity(cfs_rq, se); @@ -3812,6 +3876,9 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, if (se->rel_deadline) se->deadline = div_s64(se->deadline * se->load.weight, weight); + if (rel_vprot) + vprot = div_s64(vprot * se->load.weight, weight); + update_load_set(&se->load, weight); do { @@ -3823,6 +3890,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, enqueue_load_avg(cfs_rq, se); if (se->on_rq) { place_entity(cfs_rq, se, 0); + if (rel_vprot) + se->vprot = se->vruntime + vprot; update_load_add(&cfs_rq->load, se->load.weight); if (!curr) __enqueue_entity(cfs_rq, se); @@ -5420,7 +5489,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) } static void -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool first) { clear_buddies(cfs_rq, se); @@ -5435,7 +5504,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) __dequeue_entity(cfs_rq, se); update_load_avg(cfs_rq, se, UPDATE_TG); - set_protect_slice(cfs_rq, se); + if (first) + set_protect_slice(cfs_rq, se); } update_stats_curr_start(cfs_rq, se); @@ -8948,13 +9018,13 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf pse = parent_entity(pse); } if (se_depth >= pse_depth) { - set_next_entity(cfs_rq_of(se), se); + set_next_entity(cfs_rq_of(se), se, true); se = parent_entity(se); } } put_prev_entity(cfs_rq, pse); - set_next_entity(cfs_rq, se); + set_next_entity(cfs_rq, se, true); __set_next_task_fair(rq, p, true); } @@ -9054,7 +9124,7 @@ static void yield_task_fair(struct rq *rq) */ if (entity_eligible(cfs_rq, se)) { se->vruntime = se->deadline; - se->deadline += calc_delta_fair(se->slice, se); + update_deadline(cfs_rq, se); } } @@ -12908,7 +12978,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) t0 = sched_clock_cpu(this_cpu); __sched_balance_update_blocked_averages(this_rq); - this_rq->next_class = &fair_sched_class; + rq_modified_begin(this_rq, &fair_sched_class); raw_spin_rq_unlock(this_rq); for_each_domain(this_cpu, sd) { @@ -12975,7 +13045,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) pulled_task = 1; /* If a higher prio class was modified, restart the pick */ - if (sched_class_above(this_rq->next_class, &fair_sched_class)) + if (rq_modified_above(this_rq, &fair_sched_class)) pulled_task = -1; out: @@ -13568,7 +13638,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - set_next_entity(cfs_rq, se); + set_next_entity(cfs_rq, se, first); /* ensure bandwidth has been allocated on our new cfs_rq */ account_cfs_rq_runtime(cfs_rq, 0); }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 3681b6a..a83be0c 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c
@@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, return cpuidle_enter(drv, dev, next_state); } +static void idle_call_stop_or_retain_tick(bool stop_tick) +{ + if (stop_tick || tick_nohz_tick_stopped()) + tick_nohz_idle_stop_tick(); + else + tick_nohz_idle_retain_tick(); +} + /** * cpuidle_idle_call - the main idle function * @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ -static void cpuidle_idle_call(void) +static void cpuidle_idle_call(bool stop_tick) { struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void) } if (cpuidle_not_available(drv, dev)) { - tick_nohz_idle_stop_tick(); + idle_call_stop_or_retain_tick(stop_tick); default_idle_call(); goto exit_idle; @@ -221,24 +229,35 @@ static void cpuidle_idle_call(void) next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); - } else { - bool stop_tick = true; + } else if (drv->state_count > 1) { + /* + * stop_tick is expected to be true by default by cpuidle + * governors, which allows them to select idle states with + * target residency above the tick period length. + */ + stop_tick = true; /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev, &stop_tick); - if (stop_tick || tick_nohz_tick_stopped()) - tick_nohz_idle_stop_tick(); - else - tick_nohz_idle_retain_tick(); + idle_call_stop_or_retain_tick(stop_tick); entered_state = call_cpuidle(drv, dev, next_state); /* * Give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); + } else { + idle_call_stop_or_retain_tick(stop_tick); + + /* + * If there is only a single idle state (or none), there is + * nothing meaningful for the governor to choose. Skip the + * governor and always use state 0. + */ + call_cpuidle(drv, dev, 0); } exit_idle: @@ -259,6 +278,7 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); + bool got_tick = false; /* * Check if we need to update blocked load @@ -329,8 +349,9 @@ static void do_idle(void) tick_nohz_idle_restart_tick(); cpu_idle_poll(); } else { - cpuidle_idle_call(); + cpuidle_idle_call(got_tick); } + got_tick = tick_nohz_idle_got_tick(); arch_cpu_idle_exit(); }
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 3b725d3..ef152d4 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c
@@ -123,8 +123,6 @@ int housekeeping_update(struct cpumask *isol_mask) struct cpumask *trial, *old = NULL; int err; - lockdep_assert_cpus_held(); - trial = kmalloc(cpumask_size(), GFP_KERNEL); if (!trial) return -ENOMEM; @@ -136,7 +134,7 @@ int housekeeping_update(struct cpumask *isol_mask) } if (!housekeeping.flags) - static_branch_enable_cpuslocked(&housekeeping_overridden); + static_branch_enable(&housekeeping_overridden); if (housekeeping.flags & HK_FLAG_DOMAIN) old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b82fb70..1ef9ba4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -805,9 +805,12 @@ struct scx_rq { cpumask_var_t cpus_to_kick_if_idle; cpumask_var_t cpus_to_preempt; cpumask_var_t cpus_to_wait; + cpumask_var_t cpus_to_sync; + bool kick_sync_pending; unsigned long kick_sync; local_t reenq_local_deferred; struct balance_callback deferred_bal_cb; + struct balance_callback kick_sync_bal_cb; struct irq_work deferred_irq_work; struct irq_work kick_cpus_irq_work; struct scx_dispatch_q bypass_dsq; @@ -2748,6 +2751,17 @@ static inline const struct sched_class *next_active_class(const struct sched_cla #define sched_class_above(_a, _b) ((_a) < (_b)) +static inline void rq_modified_begin(struct rq *rq, const struct sched_class *class) +{ + if (sched_class_above(rq->next_class, class)) + rq->next_class = class; +} + +static inline bool rq_modified_above(struct rq *rq, const struct sched_class *class) +{ + return sched_class_above(rq->next_class, class); +} + static inline bool sched_stop_runnable(struct rq *rq) { return rq->stop && task_on_rq_queued(rq->stop);
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 6f10db36..cadb0e9 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c
@@ -284,6 +284,35 @@ static bool check_same_owner(struct task_struct *p) uid_eq(cred->euid, pcred->uid)); } +#ifdef CONFIG_RT_MUTEXES +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ + /* + * In case a DEADLINE task (either proper or boosted) gets + * setscheduled to a lower priority class, check if it neeeds to + * inherit parameters from a potential pi_task. In that case make + * sure replenishment happens with the next enqueue. + */ + + if (dl_prio(newprio) && !dl_policy(policy)) { + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + if (pi_task) { + p->dl.pi_se = pi_task->dl.pi_se; + scope->flags |= ENQUEUE_REPLENISH; + } + } +} +#else /* !CONFIG_RT_MUTEXES */ +static inline void __setscheduler_dl_pi(int newprio, int policy, + struct task_struct *p, + struct sched_change_ctx *scope) +{ +} +#endif /* !CONFIG_RT_MUTEXES */ + #ifdef CONFIG_UCLAMP_TASK static int uclamp_validate(struct task_struct *p, @@ -655,6 +684,7 @@ int __sched_setscheduler(struct task_struct *p, __setscheduler_params(p, attr); p->sched_class = next_class; p->prio = newprio; + __setscheduler_dl_pi(newprio, policy, p, scope); } __setscheduler_uclamp(p, attr);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d3a666f..c9efb17 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c
@@ -1118,7 +1118,7 @@ int proc_do_large_bitmap(const struct ctl_table *table, int dir, unsigned long bitmap_len = table->maxlen; unsigned long *bitmap = *(unsigned long **) table->data; unsigned long *tmp_bitmap = NULL; - char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; + char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c = 0; if (!bitmap || !bitmap_len || !left || (*ppos && SYSCTL_KERN_TO_USER(dir))) { *lenp = 0;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 069d93b..b64db40 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c
@@ -540,7 +540,7 @@ static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; - return alarm_forward(alarm, timr->it_interval, now); + return alarm_forward(alarm, now, timr->it_interval); } /**
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a5c7d15..9daf8c5 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c
@@ -256,8 +256,6 @@ EXPORT_SYMBOL(proc_dointvec_jiffies); int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int dir, void *buffer, size_t *lenp, loff_t *ppos) { - if (SYSCTL_USER_TO_KERN(dir) && USER_HZ < HZ) - return -EINVAL; return proc_dointvec_conv(table, dir, buffer, lenp, ppos, do_proc_int_conv_userhz_jiffies); }
diff --git a/kernel/time/time.c b/kernel/time/time.c index 0ba8e3c..0d83231 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c
@@ -365,20 +365,16 @@ SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp) } #endif +#if HZ > MSEC_PER_SEC || (MSEC_PER_SEC % HZ) /** * jiffies_to_msecs - Convert jiffies to milliseconds * @j: jiffies value * - * Avoid unnecessary multiplications/divisions in the - * two most common HZ cases. - * * Return: milliseconds value */ unsigned int jiffies_to_msecs(const unsigned long j) { -#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) - return (MSEC_PER_SEC / HZ) * j; -#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +#if HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); #else # if BITS_PER_LONG == 32 @@ -390,7 +386,9 @@ unsigned int jiffies_to_msecs(const unsigned long j) #endif } EXPORT_SYMBOL(jiffies_to_msecs); +#endif +#if (USEC_PER_SEC % HZ) /** * jiffies_to_usecs - Convert jiffies to microseconds * @j: jiffies value @@ -405,17 +403,14 @@ unsigned int jiffies_to_usecs(const unsigned long j) */ BUILD_BUG_ON(HZ > USEC_PER_SEC); -#if !(USEC_PER_SEC % HZ) - return (USEC_PER_SEC / HZ) * j; -#else -# if BITS_PER_LONG == 32 +#if BITS_PER_LONG == 32 return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; -# else +#else return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; -# endif #endif } EXPORT_SYMBOL(jiffies_to_usecs); +#endif /** * mktime64 - Converts date to seconds. @@ -702,7 +697,7 @@ EXPORT_SYMBOL(clock_t_to_jiffies); * * Return: jiffies_64 value converted to 64-bit "clock_t" (CLOCKS_PER_SEC) */ -u64 jiffies_64_to_clock_t(u64 x) +notrace u64 jiffies_64_to_clock_t(u64 x) { #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 # if HZ < USER_HZ
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91fa200..c07e562 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c
@@ -2653,7 +2653,8 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux if (aux_clock) { /* Auxiliary clocks are similar to TAI and do not have leap seconds */ - if (txc->status & (STA_INS | STA_DEL)) + if (txc->modes & ADJ_STATUS && + txc->status & (STA_INS | STA_DEL)) return -EINVAL; /* No TAI offset setting */ @@ -2661,7 +2662,8 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux return -EINVAL; /* No PPS support either */ - if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) + if (txc->modes & ADJ_STATUS && + txc->status & (STA_PPSFREQ | STA_PPSTIME)) return -EINVAL; }
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index c1ed0d5..155eeae 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c
@@ -1559,8 +1559,6 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) cpumask_var_t cpumask __free(free_cpumask_var) = CPUMASK_VAR_NULL; int cpu; - lockdep_assert_cpus_held(); - if (!works) return -ENOMEM; if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) @@ -1570,6 +1568,7 @@ int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask) * First set previously isolated CPUs as available (unisolate). * This cpumask contains only CPUs that switched to available now. */ + guard(cpus_read_lock)(); cpumask_andnot(cpumask, cpu_online_mask, exclude_cpumask); cpumask_andnot(cpumask, cpumask, tmigr_available_cpumask); @@ -1626,7 +1625,6 @@ static int __init tmigr_init_isolation(void) cpumask_andnot(cpumask, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); /* Protect against RCU torture hotplug testing */ - guard(cpus_read_lock)(); return tmigr_isolated_exclude_cpumask(cpumask); } late_initcall(tmigr_init_isolation);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 30259dca..8cd2520 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c
@@ -383,8 +383,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, cpu = raw_smp_processor_id(); if (blk_tracer) { - tracing_record_cmdline(current); - buffer = blk_tr->array_buffer.buffer; trace_ctx = tracing_gen_ctx_flags(0); switch (bt->version) { @@ -419,6 +417,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, if (!event) return; + tracing_record_cmdline(current); switch (bt->version) { case 1: record_blktrace_event(ring_buffer_event_data(event),
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9bc0dfd..af7079a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c
@@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_kprobe_multi_link *kmulti_link; + bool has_cookies; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + has_cookies = !!kmulti_link->cookies; seq_printf(seq, "kprobe_cnt:\t%u\n" @@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, for (int i = 0; i < kmulti_link->cnt; i++) { seq_printf(seq, "%llu\t %pS\n", - kmulti_link->cookies[i], + has_cookies ? kmulti_link->cookies[i] : 0, (void *)kmulti_link->addrs[i]); } } @@ -2750,6 +2752,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (!is_kprobe_multi(prog)) return -EINVAL; + /* kprobe_multi is not allowed to be sleepable. */ + if (prog->sleepable) + return -EINVAL; + /* Writing to context is not allowed for kprobes. */ if (prog->aux->kprobe_write_ctx) return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 827fb9a..4133109 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c
@@ -6404,6 +6404,7 @@ int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash) new_filter_hash = old_filter_hash; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, @@ -6530,6 +6531,7 @@ int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash) ops->func_hash->filter_hash = NULL; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, @@ -6604,9 +6606,9 @@ int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, b if (!orig_hash) goto unlock; - /* Enable the tmp_ops to have the same functions as the direct ops */ + /* Enable the tmp_ops to have the same functions as the hash object. */ ftrace_ops_init(&tmp_ops); - tmp_ops.func_hash = ops->func_hash; + tmp_ops.func_hash->filter_hash = hash; err = register_ftrace_function_nolock(&tmp_ops); if (err) @@ -8611,6 +8613,7 @@ ftrace_pid_follow_sched_process_fork(void *data, struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, self, task); @@ -8624,6 +8627,7 @@ ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, NULL, task);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f16f053e..170170b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c
@@ -2053,7 +2053,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) entries += ret; entry_bytes += local_read(&head_page->page->commit); - local_set(&cpu_buffer->head_page->entries, ret); + local_set(&head_page->entries, ret); if (head_page == cpu_buffer->commit_page) break; @@ -7310,6 +7310,27 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu, return err; } +/* + * This is called when a VMA is duplicated (e.g., on fork()) to increment + * the user_mapped counter without remapping pages. + */ +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + + if (WARN_ON(!cpumask_test_cpu(cpu, buffer->cpumask))) + return; + + cpu_buffer = buffer->buffers[cpu]; + + guard(mutex)(&cpu_buffer->mapping_lock); + + if (cpu_buffer->user_mapped) + __rb_inc_dec_mapped(cpu_buffer, true); + else + WARN(1, "Unexpected buffer stat, it should be mapped"); +} + int ring_buffer_unmap(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 23de371..a626211 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c
@@ -555,7 +555,7 @@ static bool update_marker_trace(struct trace_array *tr, int enabled) lockdep_assert_held(&event_mutex); if (enabled) { - if (!list_empty(&tr->marker_list)) + if (tr->trace_flags & TRACE_ITER(COPY_MARKER)) return false; list_add_rcu(&tr->marker_list, &marker_copies); @@ -563,10 +563,10 @@ static bool update_marker_trace(struct trace_array *tr, int enabled) return true; } - if (list_empty(&tr->marker_list)) + if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER))) return false; - list_del_init(&tr->marker_list); + list_del_rcu(&tr->marker_list); tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); return true; } @@ -6784,6 +6784,23 @@ char *trace_user_fault_read(struct trace_user_buf_info *tinfo, do { /* + * It is possible that something is trying to migrate this + * task. What happens then, is when preemption is enabled, + * the migration thread will preempt this task, try to + * migrate it, fail, then let it run again. That will + * cause this to loop again and never succeed. + * On failures, enabled and disable preemption with + * migration enabled, to allow the migration thread to + * migrate this task. + */ + if (trys) { + preempt_enable_notrace(); + preempt_disable_notrace(); + cpu = smp_processor_id(); + buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; + } + + /* * If for some reason, copy_from_user() always causes a context * switch, this would then cause an infinite loop. * If this task is preempted by another user space task, it @@ -8213,6 +8230,18 @@ static inline int get_snapshot_map(struct trace_array *tr) { return 0; } static inline void put_snapshot_map(struct trace_array *tr) { } #endif +/* + * This is called when a VMA is duplicated (e.g., on fork()) to increment + * the user_mapped counter without remapping pages. + */ +static void tracing_buffers_mmap_open(struct vm_area_struct *vma) +{ + struct ftrace_buffer_info *info = vma->vm_file->private_data; + struct trace_iterator *iter = &info->iter; + + ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file); +} + static void tracing_buffers_mmap_close(struct vm_area_struct *vma) { struct ftrace_buffer_info *info = vma->vm_file->private_data; @@ -8232,6 +8261,7 @@ static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long a } static const struct vm_operations_struct tracing_buffers_vmops = { + .open = tracing_buffers_mmap_open, .close = tracing_buffers_mmap_close, .may_split = tracing_buffers_may_split, }; @@ -9337,7 +9367,7 @@ static void setup_trace_scratch(struct trace_array *tr, } static int -allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) +allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size) { enum ring_buffer_flags rb_flags; struct trace_scratch *tscratch; @@ -9392,7 +9422,7 @@ static void free_trace_buffer(struct array_buffer *buf) } } -static int allocate_trace_buffers(struct trace_array *tr, int size) +static int allocate_trace_buffers(struct trace_array *tr, unsigned long size) { int ret; @@ -9731,18 +9761,19 @@ static int __remove_instance(struct trace_array *tr) list_del(&tr->list); + if (printk_trace == tr) + update_printk_trace(&global_trace); + + /* Must be done before disabling all the flags */ + if (update_marker_trace(tr, 0)) + synchronize_rcu(); + /* Disable all the flags that were enabled coming in */ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { if ((1ULL << i) & ZEROED_TRACE_FLAGS) set_tracer_flag(tr, 1ULL << i, 0); } - if (printk_trace == tr) - update_printk_trace(&global_trace); - - if (update_marker_trace(tr, 0)) - synchronize_rcu(); - tracing_set_nop(tr); clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); @@ -10756,7 +10787,7 @@ __init static void enable_instances(void) __init static int tracer_alloc_buffers(void) { - int ring_buf_size; + unsigned long ring_buf_size; int ret = -ENOMEM;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9928da6..249d1cb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c
@@ -1039,6 +1039,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task) struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_raw(tr->filtered_pids); trace_filter_add_remove_task(pid_list, NULL, task); @@ -1054,6 +1055,7 @@ event_filter_pid_sched_process_fork(void *data, struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->filtered_pids); trace_filter_add_remove_task(pid_list, self, task); @@ -4491,7 +4493,11 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { - strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); + if (bootup_event_buf[0] != '\0') + strlcat(bootup_event_buf, ",", COMMAND_LINE_SIZE); + + strlcat(bootup_event_buf, str, COMMAND_LINE_SIZE); + trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event tracing"); @@ -4668,26 +4674,22 @@ static __init int event_trace_memsetup(void) return 0; } -__init void -early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +/* + * Helper function to enable or disable a comma-separated list of events + * from the bootup buffer. + */ +static __init void __early_set_events(struct trace_array *tr, char *buf, bool enable) { char *token; - int ret; - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; - + while ((token = strsep(&buf, ","))) { if (*token) { - /* Restarting syscalls requires that we stop them first */ - if (disable_first) + if (enable) { + if (ftrace_set_clr_event(tr, token, 1)) + pr_warn("Failed to enable trace event: %s\n", token); + } else { ftrace_set_clr_event(tr, token, 0); - - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); + } } /* Put back the comma to allow this to be called again */ @@ -4696,6 +4698,32 @@ early_enable_events(struct trace_array *tr, char *buf, bool disable_first) } } +/** + * early_enable_events - enable events from the bootup buffer + * @tr: The trace array to enable the events in + * @buf: The buffer containing the comma separated list of events + * @disable_first: If true, disable all events in @buf before enabling them + * + * This function enables events from the bootup buffer. If @disable_first + * is true, it will first disable all events in the buffer before enabling + * them. + * + * For syscall events, which rely on a global refcount to register the + * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must + * ensure the refcount hits zero before re-enabling them. A simple + * "disable then enable" per-event is not enough if multiple syscalls are + * used, as the refcount will stay above zero. Thus, we need a two-phase + * approach: disable all, then enable all. + */ +__init void +early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +{ + if (disable_first) + __early_set_events(tr, buf, false); + + __early_set_events(tr, buf, true); +} + static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array();
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index fecbd67..655db2e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c
@@ -22,6 +22,39 @@ static struct task_struct *trigger_kthread; static struct llist_head trigger_data_free_list; static DEFINE_MUTEX(trigger_data_kthread_mutex); +static int trigger_kthread_fn(void *ignore); + +static void trigger_create_kthread_locked(void) +{ + lockdep_assert_held(&trigger_data_kthread_mutex); + + if (!trigger_kthread) { + struct task_struct *kthread; + + kthread = kthread_create(trigger_kthread_fn, NULL, + "trigger_data_free"); + if (!IS_ERR(kthread)) + WRITE_ONCE(trigger_kthread, kthread); + } +} + +static void trigger_data_free_queued_locked(void) +{ + struct event_trigger_data *data, *tmp; + struct llist_node *llnodes; + + lockdep_assert_held(&trigger_data_kthread_mutex); + + llnodes = llist_del_all(&trigger_data_free_list); + if (!llnodes) + return; + + tracepoint_synchronize_unregister(); + + llist_for_each_entry_safe(data, tmp, llnodes, llist) + kfree(data); +} + /* Bulk garbage collection of event_trigger_data elements */ static int trigger_kthread_fn(void *ignore) { @@ -50,33 +83,56 @@ static int trigger_kthread_fn(void *ignore) void trigger_data_free(struct event_trigger_data *data) { + if (!data) + return; + if (data->cmd_ops->set_filter) data->cmd_ops->set_filter(NULL, data, NULL); - if (unlikely(!trigger_kthread)) { - guard(mutex)(&trigger_data_kthread_mutex); - /* Check again after taking mutex */ - if (!trigger_kthread) { - struct task_struct *kthread; - - kthread = kthread_create(trigger_kthread_fn, NULL, - "trigger_data_free"); - if (!IS_ERR(kthread)) - WRITE_ONCE(trigger_kthread, kthread); - } + /* + * Boot-time trigger registration can fail before kthread creation + * works. Keep the deferred-free semantics during boot and let late + * init start the kthread to drain the list. + */ + if (system_state == SYSTEM_BOOTING && !trigger_kthread) { + llist_add(&data->llist, &trigger_data_free_list); + return; } - if (!trigger_kthread) { - /* Do it the slow way */ - tracepoint_synchronize_unregister(); - kfree(data); - return; + if (unlikely(!trigger_kthread)) { + guard(mutex)(&trigger_data_kthread_mutex); + + trigger_create_kthread_locked(); + /* Check again after taking mutex */ + if (!trigger_kthread) { + llist_add(&data->llist, &trigger_data_free_list); + /* Drain the queued frees synchronously if creation failed. */ + trigger_data_free_queued_locked(); + return; + } } llist_add(&data->llist, &trigger_data_free_list); wake_up_process(trigger_kthread); } +static int __init trigger_data_free_init(void) +{ + guard(mutex)(&trigger_data_kthread_mutex); + + if (llist_empty(&trigger_data_free_list)) + return 0; + + trigger_create_kthread_locked(); + if (trigger_kthread) + wake_up_process(trigger_kthread); + else + trigger_data_free_queued_locked(); + + return 0; +} +late_initcall(trigger_data_free_init); + static inline void data_ops_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3d8239fe..0d2d3a2 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c
@@ -400,14 +400,19 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops, struct ftrace_regs *fregs) { + unsigned long *task_var = fgraph_get_task_var(gops); struct fgraph_times *ftimes; struct trace_array *tr; + unsigned int trace_ctx; + u64 calltime, rettime; int size; + rettime = trace_clock_local(); + ftrace_graph_addr_finish(gops, trace); - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { - trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); + if (*task_var & TRACE_GRAPH_NOTRACE) { + *task_var &= ~TRACE_GRAPH_NOTRACE; return; } @@ -418,11 +423,13 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, tr = gops->private; handle_nosleeptime(tr, trace, ftimes, size); - if (tracing_thresh && - (trace_clock_local() - ftimes->calltime < tracing_thresh)) + calltime = ftimes->calltime; + + if (tracing_thresh && (rettime - calltime < tracing_thresh)) return; - else - trace_graph_return(trace, gops, fregs); + + trace_ctx = tracing_gen_ctx(); + __trace_graph_return(tr, trace, trace_ctx, calltime, rettime); } static struct fgraph_ops funcgraph_ops = {
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index dee610e..be6cf0b 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c
@@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy) if (!osnoise_has_registered_instances()) return; - guard(mutex)(&interface_lock); guard(cpus_read_lock)(); + guard(mutex)(&interface_lock); if (!cpu_online(cpu)) return; @@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * avoid CPU hotplug operations that might read options. */ cpus_read_lock(); + mutex_lock(&interface_lock); retval = cnt; @@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, clear_bit(option, &osnoise_options); } - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads(); @@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * osnoise_cpumask is read by CPU hotplug operations. */ cpus_read_lock(); + mutex_lock(&interface_lock); cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index aeaec79..eda7565 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -190,7 +190,7 @@ struct worker_pool { int id; /* I: pool ID */ unsigned int flags; /* L: flags */ - unsigned long watchdog_ts; /* L: watchdog timestamp */ + unsigned long last_progress_ts; /* L: last forward progress timestamp */ bool cpu_stall; /* WD: stalled cpu bound pool */ /* @@ -1697,7 +1697,7 @@ static void __pwq_activate_work(struct pool_workqueue *pwq, WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE)); trace_workqueue_activate_work(work); if (list_empty(&pwq->pool->worklist)) - pwq->pool->watchdog_ts = jiffies; + pwq->pool->last_progress_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb); } @@ -2348,7 +2348,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, */ if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { if (list_empty(&pool->worklist)) - pool->watchdog_ts = jiffies; + pool->last_progress_ts = jiffies; trace_workqueue_activate_work(work); insert_work(pwq, work, &pool->worklist, work_flags); @@ -3204,6 +3204,7 @@ __acquires(&pool->lock) worker->current_pwq = pwq; if (worker->task) worker->current_at = worker->task->se.sum_exec_runtime; + worker->current_start = jiffies; work_data = *work_data_bits(work); worker->current_color = get_work_color(work_data); @@ -3352,7 +3353,7 @@ static void process_scheduled_works(struct worker *worker) while ((work = list_first_entry_or_null(&worker->scheduled, struct work_struct, entry))) { if (first) { - worker->pool->watchdog_ts = jiffies; + worker->pool->last_progress_ts = jiffies; first = false; } process_one_work(worker, work); @@ -4850,7 +4851,7 @@ static int init_worker_pool(struct worker_pool *pool) pool->cpu = -1; pool->node = NUMA_NO_NODE; pool->flags |= POOL_DISASSOCIATED; - pool->watchdog_ts = jiffies; + pool->last_progress_ts = jiffies; INIT_LIST_HEAD(&pool->worklist); INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); @@ -6274,7 +6275,7 @@ static void pr_cont_worker_id(struct worker *worker) { struct worker_pool *pool = worker->pool; - if (pool->flags & WQ_BH) + if (pool->flags & POOL_BH) pr_cont("bh%s", pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); else @@ -6359,6 +6360,8 @@ static void show_pwq(struct pool_workqueue *pwq) pr_cont(" %s", comma ? "," : ""); pr_cont_worker_id(worker); pr_cont(":%ps", worker->current_func); + pr_cont(" for %us", + jiffies_to_msecs(jiffies - worker->current_start) / 1000); list_for_each_entry(work, &worker->scheduled, entry) pr_cont_work(false, work, &pcws); pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); @@ -6462,7 +6465,7 @@ static void show_one_worker_pool(struct worker_pool *pool) /* How long the first pending work is waiting for a worker. */ if (!list_empty(&pool->worklist)) - hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; + hung = jiffies_to_msecs(jiffies - pool->last_progress_ts) / 1000; /* * Defer printing to avoid deadlocks in console drivers that @@ -7580,11 +7583,11 @@ MODULE_PARM_DESC(panic_on_stall_time, "Panic if stall exceeds this many seconds /* * Show workers that might prevent the processing of pending work items. - * The only candidates are CPU-bound workers in the running state. - * Pending work items should be handled by another idle worker - * in all other situations. + * A busy worker that is not running on the CPU (e.g. sleeping in + * wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as + * effectively as a CPU-bound one, so dump every in-flight worker. */ -static void show_cpu_pool_hog(struct worker_pool *pool) +static void show_cpu_pool_busy_workers(struct worker_pool *pool) { struct worker *worker; unsigned long irq_flags; @@ -7593,36 +7596,34 @@ static void show_cpu_pool_hog(struct worker_pool *pool) raw_spin_lock_irqsave(&pool->lock, irq_flags); hash_for_each(pool->busy_hash, bkt, worker, hentry) { - if (task_is_running(worker->task)) { - /* - * Defer printing to avoid deadlocks in console - * drivers that queue work while holding locks - * also taken in their write paths. - */ - printk_deferred_enter(); + /* + * Defer printing to avoid deadlocks in console + * drivers that queue work while holding locks + * also taken in their write paths. + */ + printk_deferred_enter(); - pr_info("pool %d:\n", pool->id); - sched_show_task(worker->task); + pr_info("pool %d:\n", pool->id); + sched_show_task(worker->task); - printk_deferred_exit(); - } + printk_deferred_exit(); } raw_spin_unlock_irqrestore(&pool->lock, irq_flags); } -static void show_cpu_pools_hogs(void) +static void show_cpu_pools_busy_workers(void) { struct worker_pool *pool; int pi; - pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); + pr_info("Showing backtraces of busy workers in stalled worker pools:\n"); rcu_read_lock(); for_each_pool(pool, pi) { if (pool->cpu_stall) - show_cpu_pool_hog(pool); + show_cpu_pool_busy_workers(pool); } @@ -7691,15 +7692,36 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); else touched = READ_ONCE(wq_watchdog_touched); - pool_ts = READ_ONCE(pool->watchdog_ts); + pool_ts = READ_ONCE(pool->last_progress_ts); if (time_after(pool_ts, touched)) ts = pool_ts; else ts = touched; - /* did we stall? */ + /* + * Did we stall? + * + * Do a lockless check first to do not disturb the system. + * + * Prevent false positives by double checking the timestamp + * under pool->lock. The lock makes sure that the check reads + * an updated pool->last_progress_ts when this CPU saw + * an already updated pool->worklist above. It seems better + * than adding another barrier into __queue_work() which + * is a hotter path. + */ if (time_after(now, ts + thresh)) { + scoped_guard(raw_spinlock_irqsave, &pool->lock) { + pool_ts = pool->last_progress_ts; + if (time_after(pool_ts, touched)) + ts = pool_ts; + else + ts = touched; + } + if (!time_after(now, ts + thresh)) + continue; + lockup_detected = true; stall_time = jiffies_to_msecs(now - pool_ts) / 1000; max_stall_time = max(max_stall_time, stall_time); @@ -7711,15 +7733,13 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) pr_cont_pool_info(pool); pr_cont(" stuck for %us!\n", stall_time); } - - } if (lockup_detected) show_all_workqueues(); if (cpu_pool_stall) - show_cpu_pools_hogs(); + show_cpu_pools_busy_workers(); if (lockup_detected) panic_on_wq_watchdog(max_stall_time);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index f627594..8def1dd 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h
@@ -32,6 +32,7 @@ struct worker { work_func_t current_func; /* K: function */ struct pool_workqueue *current_pwq; /* K: pwq */ u64 current_at; /* K: runtime at start or last wakeup */ + unsigned long current_start; /* K: start time of current work item */ unsigned int current_color; /* K: color */ int sleeping; /* S: is worker sleeping? */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e2dfbb..93f356d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug
@@ -630,7 +630,7 @@ config WARN_CONTEXT_ANALYSIS bool "Compiler context-analysis warnings" - depends on CC_IS_CLANG && CLANG_VERSION >= 220000 + depends on CC_IS_CLANG && CLANG_VERSION >= 220100 # Branch profiling re-defines "if", which messes with the compiler's # ability to analyze __cond_acquires(..), resulting in false positives. depends on !TRACE_BRANCH_PROFILING @@ -641,7 +641,7 @@ and releasing user-definable "context locks". Clang's name of the feature is "Thread Safety Analysis". Requires - Clang 22 or later. + Clang 22.1.0 or later. Produces warnings by default. Select CONFIG_WERROR if you wish to turn these warnings into errors. @@ -760,6 +760,7 @@ config DEBUG_OBJECTS bool "Debug object operations" + depends on PREEMPT_COUNT || !DEFERRED_STRUCT_PAGE_INIT depends on DEBUG_KERNEL help If you say Y here, additional code will be inserted into the @@ -1766,33 +1767,6 @@ It is also used by various kernel debugging features that require stack trace generation. -config WARN_ALL_UNSEEDED_RANDOM - bool "Warn for all uses of unseeded randomness" - default n - help - Some parts of the kernel contain bugs relating to their use of - cryptographically secure random numbers before it's actually possible - to generate those numbers securely. This setting ensures that these - flaws don't go unnoticed, by enabling a message, should this ever - occur. This will allow people with obscure setups to know when things - are going wrong, so that they might contact developers about fixing - it. - - Unfortunately, on some models of some architectures getting - a fully seeded CRNG is extremely difficult, and so this can - result in dmesg getting spammed for a surprisingly long - time. This is really bad from a security perspective, and - so architecture maintainers really need to do what they can - to get the CRNG seeded sooner after the system is booted. - However, since users cannot do anything actionable to - address this, by default this option is disabled. - - Say Y here if you want to receive warnings for all uses of - unseeded randomness. This will be of use primarily for - those developers interested in improving the security of - Linux kernels running on their architecture (or - subarchitecture). - config DEBUG_KOBJECT bool "kobject debugging" depends on DEBUG_KERNEL
diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 449369a..e88d022 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c
@@ -316,7 +316,7 @@ int __init xbc_node_compose_key_after(struct xbc_node *root, depth ? "." : ""); if (ret < 0) return ret; - if (ret > size) { + if (ret >= size) { size = 0; } else { size -= ret; @@ -532,9 +532,9 @@ static char *skip_spaces_until_newline(char *p) static int __init __xbc_open_brace(char *p) { /* Push the last key as open brace */ - open_brace[brace_index++] = xbc_node_index(last_parent); if (brace_index >= XBC_DEPTH_MAX) return xbc_parse_error("Exceed max depth of braces", p); + open_brace[brace_index++] = xbc_node_index(last_parent); return 0; } @@ -723,7 +723,8 @@ static int __init xbc_parse_kv(char **k, char *v, int op) if (op == ':') { unsigned short nidx = child->next; - xbc_init_node(child, v, XBC_VALUE); + if (xbc_init_node(child, v, XBC_VALUE) < 0) + return xbc_parse_error("Failed to override value", v); child->next = nidx; /* keep subkeys */ goto array; } @@ -802,7 +803,7 @@ static int __init xbc_verify_tree(void) /* Brace closing */ if (brace_index) { - n = &xbc_nodes[open_brace[brace_index]]; + n = &xbc_nodes[open_brace[brace_index - 1]]; return xbc_parse_error("Brace is not closed", xbc_node_get_data(n)); }
diff --git a/lib/bug.c b/lib/bug.c index 623c467..aab9e6a 100644 --- a/lib/bug.c +++ b/lib/bug.c
@@ -173,10 +173,8 @@ struct bug_entry *find_bug(unsigned long bugaddr) return module_find_bug(bugaddr); } -__diag_push(); -__diag_ignore(GCC, all, "-Wsuggest-attribute=format", - "Not a valid __printf() conversion candidate."); -static void __warn_printf(const char *fmt, struct pt_regs *regs) +static __printf(1, 0) +void __warn_printf(const char *fmt, struct pt_regs *regs) { if (!fmt) return; @@ -195,7 +193,6 @@ static void __warn_printf(const char *fmt, struct pt_regs *regs) printk("%s", fmt); } -__diag_pop(); static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long bugaddr, struct pt_regs *regs) {
diff --git a/lib/crypto/.kunitconfig b/lib/crypto/.kunitconfig new file mode 100644 index 0000000..6b2ce28 --- /dev/null +++ b/lib/crypto/.kunitconfig
@@ -0,0 +1,34 @@ +CONFIG_KUNIT=y + +# These kconfig options select all the CONFIG_CRYPTO_LIB_* symbols that have a +# corresponding KUnit test. Those symbols cannot be directly enabled here, +# since they are hidden symbols. +CONFIG_CRYPTO=y +CONFIG_CRYPTO_ADIANTUM=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_HCTR2=y +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MLDSA=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_SHA3=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_WIREGUARD=y + +CONFIG_CRYPTO_LIB_BLAKE2B_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_MLDSA_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_NH_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_POLYVAL_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA512_KUNIT_TEST=y +CONFIG_CRYPTO_LIB_SHA3_KUNIT_TEST=y
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 725eef0..dc7a56f7 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile
@@ -55,6 +55,9 @@ libaes-$(CONFIG_X86) += x86/aes-aesni.o endif # CONFIG_CRYPTO_LIB_AES_ARCH +# clean-files must be defined unconditionally +clean-files += powerpc/aesp8-ppc.S + ################################################################################ obj-$(CONFIG_CRYPTO_LIB_AESCFB) += libaescfb.o
diff --git a/lib/crypto/chacha-block-generic.c b/lib/crypto/chacha-block-generic.c index 77f68de..4a6d627 100644 --- a/lib/crypto/chacha-block-generic.c +++ b/lib/crypto/chacha-block-generic.c
@@ -87,6 +87,8 @@ void chacha_block_generic(struct chacha_state *state, &out[i * sizeof(u32)]); state->x[12]++; + + chacha_zeroize_state(&permuted_state); } EXPORT_SYMBOL(chacha_block_generic); @@ -110,5 +112,7 @@ void hchacha_block_generic(const struct chacha_state *state, memcpy(&out[0], &permuted_state.x[0], 16); memcpy(&out[4], &permuted_state.x[12], 16); + + chacha_zeroize_state(&permuted_state); } EXPORT_SYMBOL(hchacha_block_generic);
diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 4970463..0de289b 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig
@@ -2,10 +2,9 @@ config CRYPTO_LIB_BLAKE2B_KUNIT_TEST tristate "KUnit tests for BLAKE2b" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_BLAKE2B default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_BLAKE2B help KUnit tests for the BLAKE2b cryptographic hash function. @@ -14,71 +13,64 @@ depends on KUNIT default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't + # No need to depend on CRYPTO_LIB_BLAKE2S here, as that option doesn't # exist; the BLAKE2s code is always built-in for the /dev/random driver. help KUnit tests for the BLAKE2s cryptographic hash function. config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_CURVE25519 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_CURVE25519 help KUnit tests for the Curve25519 Diffie-Hellman function. config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_MD5 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_MD5 help KUnit tests for the MD5 cryptographic hash function and its corresponding HMAC. config CRYPTO_LIB_MLDSA_KUNIT_TEST tristate "KUnit tests for ML-DSA" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_MLDSA default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_MLDSA help KUnit tests for the ML-DSA digital signature algorithm. config CRYPTO_LIB_NH_KUNIT_TEST tristate "KUnit tests for NH" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_NH default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS - select CRYPTO_LIB_NH help KUnit tests for the NH almost-universal hash function. config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_POLY1305 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_POLY1305 help KUnit tests for the Poly1305 library functions. config CRYPTO_LIB_POLYVAL_KUNIT_TEST tristate "KUnit tests for POLYVAL" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_POLYVAL default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_POLYVAL help KUnit tests for the POLYVAL library functions. config CRYPTO_LIB_SHA1_KUNIT_TEST tristate "KUnit tests for SHA-1" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA1 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA1 help KUnit tests for the SHA-1 cryptographic hash function and its corresponding HMAC. @@ -87,10 +79,9 @@ # included, for consistency with the naming used elsewhere (e.g. CRYPTO_SHA256). config CRYPTO_LIB_SHA256_KUNIT_TEST tristate "KUnit tests for SHA-224 and SHA-256" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA256 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA256 help KUnit tests for the SHA-224 and SHA-256 cryptographic hash functions and their corresponding HMACs. @@ -99,20 +90,18 @@ # included, for consistency with the naming used elsewhere (e.g. CRYPTO_SHA512). config CRYPTO_LIB_SHA512_KUNIT_TEST tristate "KUnit tests for SHA-384 and SHA-512" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA512 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA512 help KUnit tests for the SHA-384 and SHA-512 cryptographic hash functions and their corresponding HMACs. config CRYPTO_LIB_SHA3_KUNIT_TEST tristate "KUnit tests for SHA-3" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && CRYPTO_LIB_SHA3 default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE - select CRYPTO_LIB_SHA3 help KUnit tests for the SHA3 cryptographic hash and XOF functions, including SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128 and
diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 89a1d67..12f50de 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c
@@ -398,9 +398,26 @@ static void fill_pool(void) atomic_inc(&cpus_allocating); while (pool_should_refill(&pool_global)) { + gfp_t gfp = __GFP_HIGH | __GFP_NOWARN; HLIST_HEAD(head); - if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN)) + /* + * Allow reclaim only in preemptible context and during + * early boot. If not preemptible, the caller might hold + * locks causing a deadlock in the allocator. + * + * If the reclaim flag is not set during early boot then + * allocations, which happen before deferred page + * initialization has completed, will fail. + * + * In preemptible context the flag is harmless and not a + * performance issue as that's usually invoked from slow + * path initialization context. + */ + if (preemptible() || system_state < SYSTEM_SCHEDULING) + gfp |= __GFP_KSWAPD_RECLAIM; + + if (!kmem_alloc_batch(&head, obj_cache, gfp)) break; guard(raw_spinlock_irqsave)(&pool_lock);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 62eb529..41e1c89 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c
@@ -94,7 +94,7 @@ struct kunit_result_stats { unsigned long total; }; -static bool kunit_should_print_stats(struct kunit_result_stats stats) +static bool kunit_should_print_stats(struct kunit_result_stats *stats) { if (kunit_stats_enabled == 0) return false; @@ -102,11 +102,11 @@ static bool kunit_should_print_stats(struct kunit_result_stats stats) if (kunit_stats_enabled == 2) return true; - return (stats.total > 1); + return (stats->total > 1); } static void kunit_print_test_stats(struct kunit *test, - struct kunit_result_stats stats) + struct kunit_result_stats *stats) { if (!kunit_should_print_stats(stats)) return; @@ -115,10 +115,10 @@ static void kunit_print_test_stats(struct kunit *test, KUNIT_SUBTEST_INDENT "# %s: pass:%lu fail:%lu skip:%lu total:%lu", test->name, - stats.passed, - stats.failed, - stats.skipped, - stats.total); + stats->passed, + stats->failed, + stats->skipped, + stats->total); } /* Append formatted message to log. */ @@ -600,26 +600,26 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite, } static void kunit_print_suite_stats(struct kunit_suite *suite, - struct kunit_result_stats suite_stats, - struct kunit_result_stats param_stats) + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *param_stats) { if (kunit_should_print_stats(suite_stats)) { kunit_log(KERN_INFO, suite, "# %s: pass:%lu fail:%lu skip:%lu total:%lu", suite->name, - suite_stats.passed, - suite_stats.failed, - suite_stats.skipped, - suite_stats.total); + suite_stats->passed, + suite_stats->failed, + suite_stats->skipped, + suite_stats->total); } if (kunit_should_print_stats(param_stats)) { kunit_log(KERN_INFO, suite, "# Totals: pass:%lu fail:%lu skip:%lu total:%lu", - param_stats.passed, - param_stats.failed, - param_stats.skipped, - param_stats.total); + param_stats->passed, + param_stats->failed, + param_stats->skipped, + param_stats->total); } } @@ -681,13 +681,116 @@ static void kunit_init_parent_param_test(struct kunit_case *test_case, struct ku } } -int kunit_run_tests(struct kunit_suite *suite) +static noinline_for_stack void +kunit_run_param_test(struct kunit_suite *suite, struct kunit_case *test_case, + struct kunit *test, + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *total_stats, + struct kunit_result_stats *param_stats) { char param_desc[KUNIT_PARAM_DESC_SIZE]; + const void *curr_param; + + kunit_init_parent_param_test(test_case, test); + if (test_case->status == KUNIT_FAILURE) { + kunit_update_stats(param_stats, test->status); + return; + } + /* Get initial param. */ + param_desc[0] = '\0'; + /* TODO: Make generate_params try-catch */ + curr_param = test_case->generate_params(test, NULL, param_desc); + test_case->status = KUNIT_SKIPPED; + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "KTAP version 1\n"); + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "# Subtest: %s", test_case->name); + if (test->params_array.params && + test_case->generate_params == kunit_array_gen_params) { + kunit_log(KERN_INFO, test, KUNIT_SUBTEST_INDENT + KUNIT_SUBTEST_INDENT "1..%zd\n", + test->params_array.num_params); + } + + while (curr_param) { + struct kunit param_test = { + .param_value = curr_param, + .param_index = ++test->param_index, + .parent = test, + }; + kunit_init_test(¶m_test, test_case->name, NULL); + param_test.log = test_case->log; + kunit_run_case_catch_errors(suite, test_case, ¶m_test); + + if (param_desc[0] == '\0') { + snprintf(param_desc, sizeof(param_desc), + "param-%d", param_test.param_index); + } + + kunit_print_ok_not_ok(¶m_test, KUNIT_LEVEL_CASE_PARAM, + param_test.status, + param_test.param_index, + param_desc, + param_test.status_comment); + + kunit_update_stats(param_stats, param_test.status); + + /* Get next param. */ + param_desc[0] = '\0'; + curr_param = test_case->generate_params(test, curr_param, + param_desc); + } + /* + * TODO: Put into a try catch. Since we don't need suite->exit + * for it we can't reuse kunit_try_run_cleanup for this yet. + */ + if (test_case->param_exit) + test_case->param_exit(test); + /* TODO: Put this kunit_cleanup into a try-catch. */ + kunit_cleanup(test); +} + +static noinline_for_stack void +kunit_run_one_test(struct kunit_suite *suite, struct kunit_case *test_case, + struct kunit_result_stats *suite_stats, + struct kunit_result_stats *total_stats) +{ + struct kunit test = { .param_value = NULL, .param_index = 0 }; + struct kunit_result_stats param_stats = { 0 }; + + kunit_init_test(&test, test_case->name, test_case->log); + if (test_case->status == KUNIT_SKIPPED) { + /* Test marked as skip */ + test.status = KUNIT_SKIPPED; + kunit_update_stats(¶m_stats, test.status); + } else if (!test_case->generate_params) { + /* Non-parameterised test. */ + test_case->status = KUNIT_SKIPPED; + kunit_run_case_catch_errors(suite, test_case, &test); + kunit_update_stats(¶m_stats, test.status); + } else { + kunit_run_param_test(suite, test_case, &test, suite_stats, + total_stats, ¶m_stats); + } + kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); + + kunit_print_test_stats(&test, ¶m_stats); + + kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE, test_case->status, + kunit_test_case_num(suite, test_case), + test_case->name, + test.status_comment); + + kunit_update_stats(suite_stats, test_case->status); + kunit_accumulate_stats(total_stats, param_stats); +} + + +int kunit_run_tests(struct kunit_suite *suite) +{ struct kunit_case *test_case; struct kunit_result_stats suite_stats = { 0 }; struct kunit_result_stats total_stats = { 0 }; - const void *curr_param; /* Taint the kernel so we know we've run tests. */ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); @@ -703,97 +806,13 @@ int kunit_run_tests(struct kunit_suite *suite) kunit_print_suite_start(suite); - kunit_suite_for_each_test_case(suite, test_case) { - struct kunit test = { .param_value = NULL, .param_index = 0 }; - struct kunit_result_stats param_stats = { 0 }; - - kunit_init_test(&test, test_case->name, test_case->log); - if (test_case->status == KUNIT_SKIPPED) { - /* Test marked as skip */ - test.status = KUNIT_SKIPPED; - kunit_update_stats(¶m_stats, test.status); - } else if (!test_case->generate_params) { - /* Non-parameterised test. */ - test_case->status = KUNIT_SKIPPED; - kunit_run_case_catch_errors(suite, test_case, &test); - kunit_update_stats(¶m_stats, test.status); - } else { - kunit_init_parent_param_test(test_case, &test); - if (test_case->status == KUNIT_FAILURE) { - kunit_update_stats(¶m_stats, test.status); - goto test_case_end; - } - /* Get initial param. */ - param_desc[0] = '\0'; - /* TODO: Make generate_params try-catch */ - curr_param = test_case->generate_params(&test, NULL, param_desc); - test_case->status = KUNIT_SKIPPED; - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "KTAP version 1\n"); - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT - "# Subtest: %s", test_case->name); - if (test.params_array.params && - test_case->generate_params == kunit_array_gen_params) { - kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT - KUNIT_SUBTEST_INDENT "1..%zd\n", - test.params_array.num_params); - } - - while (curr_param) { - struct kunit param_test = { - .param_value = curr_param, - .param_index = ++test.param_index, - .parent = &test, - }; - kunit_init_test(¶m_test, test_case->name, NULL); - param_test.log = test_case->log; - kunit_run_case_catch_errors(suite, test_case, ¶m_test); - - if (param_desc[0] == '\0') { - snprintf(param_desc, sizeof(param_desc), - "param-%d", param_test.param_index); - } - - kunit_print_ok_not_ok(¶m_test, KUNIT_LEVEL_CASE_PARAM, - param_test.status, - param_test.param_index, - param_desc, - param_test.status_comment); - - kunit_update_stats(¶m_stats, param_test.status); - - /* Get next param. */ - param_desc[0] = '\0'; - curr_param = test_case->generate_params(&test, curr_param, - param_desc); - } - /* - * TODO: Put into a try catch. Since we don't need suite->exit - * for it we can't reuse kunit_try_run_cleanup for this yet. - */ - if (test_case->param_exit) - test_case->param_exit(&test); - /* TODO: Put this kunit_cleanup into a try-catch. */ - kunit_cleanup(&test); - } -test_case_end: - kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); - - kunit_print_test_stats(&test, param_stats); - - kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE, test_case->status, - kunit_test_case_num(suite, test_case), - test_case->name, - test.status_comment); - - kunit_update_stats(&suite_stats, test_case->status); - kunit_accumulate_stats(&total_stats, param_stats); - } + kunit_suite_for_each_test_case(suite, test_case) + kunit_run_one_test(suite, test_case, &suite_stats, &total_stats); if (suite->suite_exit) suite->suite_exit(suite); - kunit_print_suite_stats(suite, suite_stats, total_stats); + kunit_print_suite_stats(suite, &suite_stats, &total_stats); suite_end: kunit_print_suite_end(suite);
diff --git a/mm/cma.c b/mm/cma.c index 94b5da4..15cc0ae 100644 --- a/mm/cma.c +++ b/mm/cma.c
@@ -1013,6 +1013,7 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { struct cma_memrange *cmr; + unsigned long ret = 0; unsigned long i, pfn; cmr = find_cma_memrange(cma, pages, count); @@ -1021,7 +1022,9 @@ bool cma_release(struct cma *cma, const struct page *pages, pfn = page_to_pfn(pages); for (i = 0; i < count; i++, pfn++) - VM_WARN_ON(!put_page_testzero(pfn_to_page(pfn))); + ret += !put_page_testzero(pfn_to_page(pfn)); + + WARN(ret, "%lu pages are still in use!\n", ret); __cma_release_frozen(cma, cmr, pages, count);
diff --git a/mm/damon/core.c b/mm/damon/core.c index 01eba1a..3e1890d 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c
@@ -1252,6 +1252,10 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) { int err; + dst->maybe_corrupted = true; + if (!is_power_of_2(src->min_region_sz)) + return -EINVAL; + err = damon_commit_schemes(dst, src); if (err) return err; @@ -1274,6 +1278,7 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) dst->addr_unit = src->addr_unit; dst->min_region_sz = src->min_region_sz; + dst->maybe_corrupted = false; return 0; } @@ -1559,8 +1564,13 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) } ctx->walk_control = control; mutex_unlock(&ctx->walk_control_lock); - if (!damon_is_running(ctx)) + if (!damon_is_running(ctx)) { + mutex_lock(&ctx->walk_control_lock); + if (ctx->walk_control == control) + ctx->walk_control = NULL; + mutex_unlock(&ctx->walk_control_lock); return -EINVAL; + } wait_for_completion(&control->completion); if (control->canceled) return -ECANCELED; @@ -2670,6 +2680,8 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel) complete(&control->completion); else if (control->canceled && control->dealloc_on_cancel) kfree(control); + if (!cancel && ctx->maybe_corrupted) + break; } mutex_lock(&ctx->call_controls_lock); @@ -2699,6 +2711,8 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) kdamond_usleep(min_wait_time); kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + return -EINVAL; damos_walk_cancel(ctx); } return -EBUSY; @@ -2782,6 +2796,8 @@ static int kdamond_fn(void *data) * kdamond_merge_regions() if possible, to reduce overhead */ kdamond_call(ctx, false); + if (ctx->maybe_corrupted) + break; if (!list_empty(&ctx->schemes)) kdamond_apply_schemes(ctx); else
diff --git a/mm/damon/stat.c b/mm/damon/stat.c index 25fb44c..60351a7 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c
@@ -145,12 +145,59 @@ static int damon_stat_damon_call_fn(void *data) return 0; } +struct damon_stat_system_ram_range_walk_arg { + bool walked; + struct resource res; +}; + +static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg) +{ + struct damon_stat_system_ram_range_walk_arg *a = arg; + + if (!a->walked) { + a->walked = true; + a->res.start = res->start; + } + a->res.end = res->end; + return 0; +} + +static unsigned long damon_stat_res_to_core_addr(resource_size_t ra, + unsigned long addr_unit) +{ + /* + * Use div_u64() for avoiding linking errors related with __udivdi3, + * __aeabi_uldivmod, or similar problems. This should also improve the + * performance optimization (read div_u64() comment for the detail). + */ + if (sizeof(ra) == 8 && sizeof(addr_unit) == 4) + return div_u64(ra, addr_unit); + return ra / addr_unit; +} + +static int damon_stat_set_monitoring_region(struct damon_target *t, + unsigned long addr_unit, unsigned long min_region_sz) +{ + struct damon_addr_range addr_range; + struct damon_stat_system_ram_range_walk_arg arg = {}; + + walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn); + if (!arg.walked) + return -EINVAL; + addr_range.start = damon_stat_res_to_core_addr( + arg.res.start, addr_unit); + addr_range.end = damon_stat_res_to_core_addr( + arg.res.end + 1, addr_unit); + if (addr_range.end <= addr_range.start) + return -EINVAL; + return damon_set_regions(t, &addr_range, 1, min_region_sz); +} + static struct damon_ctx *damon_stat_build_ctx(void) { struct damon_ctx *ctx; struct damon_attrs attrs; struct damon_target *target; - unsigned long start = 0, end = 0; ctx = damon_new_ctx(); if (!ctx) @@ -180,8 +227,8 @@ static struct damon_ctx *damon_stat_build_ctx(void) if (!target) goto free_out; damon_add_target(ctx, target); - if (damon_set_region_biggest_system_ram_default(target, &start, &end, - ctx->min_region_sz)) + if (damon_stat_set_monitoring_region(target, ctx->addr_unit, + ctx->min_region_sz)) goto free_out; return ctx; free_out: @@ -198,6 +245,12 @@ static int damon_stat_start(void) { int err; + if (damon_stat_context) { + if (damon_is_running(damon_stat_context)) + return -EAGAIN; + damon_destroy_ctx(damon_stat_context); + } + damon_stat_context = damon_stat_build_ctx(); if (!damon_stat_context) return -ENOMEM; @@ -214,6 +267,7 @@ static void damon_stat_stop(void) { damon_stop(&damon_stat_context, 1); damon_destroy_ctx(damon_stat_context); + damon_stat_context = NULL; } static int damon_stat_enabled_store(
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 576d1dd..eefa959 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c
@@ -1524,8 +1524,10 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx); - if (!test_ctx) + if (!test_ctx) { + damon_destroy_ctx(param_ctx); return -ENOMEM; + } err = damon_commit_ctx(test_ctx, param_ctx); if (err) goto out; @@ -1618,9 +1620,12 @@ static int damon_sysfs_repeat_call_fn(void *data) if (!mutex_trylock(&damon_sysfs_lock)) return 0; + if (sysfs_kdamond->contexts->nr != 1) + goto out; damon_sysfs_upd_tuned_intervals(sysfs_kdamond); damon_sysfs_upd_schemes_stats(sysfs_kdamond); damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond); +out: mutex_unlock(&damon_sysfs_lock); return 0; } @@ -1665,7 +1670,8 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) repeat_call_control->data = kdamond; repeat_call_control->repeat = true; repeat_call_control->dealloc_on_cancel = true; - damon_call(ctx, repeat_call_control); + if (damon_call(ctx, repeat_call_control)) + kfree(repeat_call_control); return err; } @@ -1747,6 +1753,9 @@ static int damon_sysfs_update_schemes_tried_regions( static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, struct damon_sysfs_kdamond *kdamond) { + if (cmd != DAMON_SYSFS_CMD_OFF && kdamond->contexts->nr != 1) + return -EINVAL; + switch (cmd) { case DAMON_SYSFS_CMD_ON: return damon_sysfs_turn_damon_on(kdamond);
diff --git a/mm/filemap.c b/mm/filemap.c index 6cd7974..3c1e785 100644 --- a/mm/filemap.c +++ b/mm/filemap.c
@@ -1379,14 +1379,16 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, #ifdef CONFIG_MIGRATION /** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. + * softleaf_entry_wait_on_locked - Wait for a migration entry or + * device_private entry to be removed. + * @entry: migration or device_private swap entry. * @ptl: already locked ptl. This function will drop the lock. * - * Wait for a migration entry referencing the given page to be removed. This is + * Wait for a migration entry referencing the given page, or device_private + * entry referencing a dvice_private page to be unlocked. This is * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing + * should be called while holding the ptl for @entry referencing * the page. * * Returns after unlocking the ptl. @@ -1394,7 +1396,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) * If a migration entry exists for the page the migration path must hold * a valid reference to the page, and it must take the ptl to remove the * migration entry. So the page is valid until the ptl is dropped. + * Similarly any path attempting to drop the last reference to a + * device-private page needs to grab the ptl to remove the device-private + * entry. */ spin_unlock(ptl); @@ -3878,14 +3883,19 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned int nr_pages = 0, folio_type; unsigned short mmap_miss = 0, mmap_miss_saved; + /* + * Recalculate end_pgoff based on file_end before calling + * next_uptodate_folio() to avoid races with concurrent + * truncation. + */ + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; + end_pgoff = min(end_pgoff, file_end); + rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); if (!folio) goto out; - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; - end_pgoff = min(end_pgoff, file_end); - /* * Do not allow to map with PMD across i_size to preserve * SIGBUS semantics.
diff --git a/mm/hmm.c b/mm/hmm.c index f6c4ddf..5955f2f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c
@@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, struct page *page = hmm_pfn_to_page(pfns[idx]); phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]); size_t offset = idx * map->dma_entry_size; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; dma_addr_t dma_addr; int ret; @@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx) struct dma_iova_state *state = &map->state; dma_addr_t *dma_addrs = map->dma_list; unsigned long *pfns = map->pfn_list; - unsigned long attrs = 0; + unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT; if ((pfns[idx] & valid_dma) != valid_dma) return false;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d4ca8cfd..b298cba 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c
@@ -94,6 +94,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) inode = file_inode(vma->vm_file); + if (IS_ANON_FILE(inode)) + return false; + return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } @@ -2794,7 +2797,8 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma); } else { src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd); - _dst_pmd = folio_mk_pmd(src_folio, dst_vma->vm_page_prot); + _dst_pmd = move_soft_dirty_pmd(src_pmdval); + _dst_pmd = clear_uffd_wp_pmd(_dst_pmd); } set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd); @@ -3628,6 +3632,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, const bool is_anon = folio_test_anon(folio); int old_order = folio_order(folio); int start_order = split_type == SPLIT_TYPE_UNIFORM ? new_order : old_order - 1; + struct folio *old_folio = folio; int split_order; /* @@ -3648,12 +3653,16 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, * uniform split has xas_split_alloc() called before * irq is disabled to allocate enough memory, whereas * non-uniform split can handle ENOMEM. + * Use the to-be-split folio, so that a parallel + * folio_try_get() waits on it until xarray is updated + * with after-split folios and the original one is + * unfrozen. */ - if (split_type == SPLIT_TYPE_UNIFORM) - xas_split(xas, folio, old_order); - else { + if (split_type == SPLIT_TYPE_UNIFORM) { + xas_split(xas, old_folio, old_order); + } else { xas_set_order(xas, folio->index, split_order); - xas_try_split(xas, folio, old_order); + xas_try_split(xas, old_folio, old_order); if (xas_error(xas)) return xas_error(xas); }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0beb6e2..327eaa4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c
@@ -3101,7 +3101,7 @@ static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact) * extract the actual node first. */ if (m) - listnode = early_pfn_to_nid(PHYS_PFN(virt_to_phys(m))); + listnode = early_pfn_to_nid(PHYS_PFN(__pa(m))); } if (m) { @@ -3160,7 +3160,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) * The head struct page is used to get folio information by the HugeTLB * subsystem like zone id and node id. */ - memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), + memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE), huge_page_size(h) - PAGE_SIZE); return 1;
diff --git a/mm/kfence/core.c b/mm/kfence/core.c index b4ea326..7393957 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c
@@ -13,6 +13,7 @@ #include <linux/hash.h> #include <linux/irq_work.h> #include <linux/jhash.h> +#include <linux/kasan-enabled.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> #include <linux/kmemleak.h> @@ -917,6 +918,20 @@ void __init kfence_alloc_pool_and_metadata(void) return; /* + * If KASAN hardware tags are enabled, disable KFENCE, because it + * does not support MTE yet. + */ + if (kasan_hw_tags_enabled()) { + pr_info("disabled as KASAN HW tags are enabled\n"); + if (__kfence_pool) { + memblock_free(__kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = NULL; + } + kfence_sample_interval = 0; + return; + } + + /* * If the pool has already been initialized by arch, there is no need to * re-allocate the memory pool. */ @@ -989,14 +1004,14 @@ static int kfence_init_late(void) #ifdef CONFIG_CONTIG_ALLOC struct page *pages; - pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (!pages) return -ENOMEM; __kfence_pool = page_to_virt(pages); - pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node, - NULL); + pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL | __GFP_SKIP_KASAN, + first_online_node, NULL); if (pages) kfence_metadata_init = page_to_virt(pages); #else @@ -1006,11 +1021,13 @@ static int kfence_init_late(void) return -EINVAL; } - __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL); + __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); if (!__kfence_pool) return -ENOMEM; - kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL); + kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, + GFP_KERNEL | __GFP_SKIP_KASAN); #endif if (!kfence_metadata_init)
diff --git a/mm/madvise.c b/mm/madvise.c index c0370d9..dbb6940 100644 --- a/mm/madvise.c +++ b/mm/madvise.c
@@ -1389,7 +1389,7 @@ static int madvise_vma_behavior(struct madvise_behavior *madv_behavior) new_flags |= VM_DONTCOPY; break; case MADV_DOFORK: - if (new_flags & VM_IO) + if (new_flags & VM_SPECIAL) return -EINVAL; new_flags &= ~VM_DONTCOPY; break;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a52da3a..772bac2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -3086,7 +3086,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, if (!local_trylock(&obj_stock.lock)) { if (pgdat) - mod_objcg_mlstate(objcg, pgdat, idx, nr_bytes); + mod_objcg_mlstate(objcg, pgdat, idx, nr_acct); nr_pages = nr_bytes >> PAGE_SHIFT; nr_bytes = nr_bytes & (PAGE_SIZE - 1); atomic_add(nr_bytes, &objcg->nr_charged_bytes);
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 5c17da3..b8edb9f 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c
@@ -146,19 +146,56 @@ static int memfd_luo_preserve_folios(struct file *file, for (i = 0; i < nr_folios; i++) { struct memfd_luo_folio_ser *pfolio = &folios_ser[i]; struct folio *folio = folios[i]; - unsigned int flags = 0; err = kho_preserve_folio(folio); if (err) goto err_unpreserve; - if (folio_test_dirty(folio)) - flags |= MEMFD_LUO_FOLIO_DIRTY; - if (folio_test_uptodate(folio)) - flags |= MEMFD_LUO_FOLIO_UPTODATE; + folio_lock(folio); + + /* + * A dirty folio is one which has been written to. A clean folio + * is its opposite. Since a clean folio does not carry user + * data, it can be freed by page reclaim under memory pressure. + * + * Saving the dirty flag at prepare() time doesn't work since it + * can change later. Saving it at freeze() also won't work + * because the dirty bit is normally synced at unmap and there + * might still be a mapping of the file at freeze(). + * + * To see why this is a problem, say a folio is clean at + * preserve, but gets dirtied later. The pfolio flags will mark + * it as clean. After retrieve, the next kernel might try to + * reclaim this folio under memory pressure, losing user data. + * + * Unconditionally mark it dirty to avoid this problem. This + * comes at the cost of making clean folios un-reclaimable after + * live update. + */ + folio_mark_dirty(folio); + + /* + * If the folio is not uptodate, it was fallocated but never + * used. Saving this flag at prepare() doesn't work since it + * might change later when someone uses the folio. + * + * Since we have taken the performance penalty of allocating, + * zeroing, and pinning all the folios in the holes, take a bit + * more and zero all non-uptodate folios too. + * + * NOTE: For someone looking to improve preserve performance, + * this is a good place to look. + */ + if (!folio_test_uptodate(folio)) { + folio_zero_range(folio, 0, folio_size(folio)); + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + folio_unlock(folio); pfolio->pfn = folio_pfn(folio); - pfolio->flags = flags; + pfolio->flags = MEMFD_LUO_FOLIO_DIRTY | MEMFD_LUO_FOLIO_UPTODATE; pfolio->index = folio->index; } @@ -326,7 +363,12 @@ static void memfd_luo_finish(struct liveupdate_file_op_args *args) struct memfd_luo_folio_ser *folios_ser; struct memfd_luo_ser *ser; - if (args->retrieved) + /* + * If retrieve was successful, nothing to do. If it failed, retrieve() + * already cleaned up everything it could. So nothing to do there + * either. Only need to clean up when retrieve was not called. + */ + if (args->retrieve_status) return; ser = phys_to_virt(args->serialized_data);
diff --git a/mm/memory.c b/mm/memory.c index 07778814..c65e82c 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -4763,7 +4763,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(vmf->page); put_page(vmf->page); } else { - pte_unmap_unlock(vmf->pte, vmf->ptl); + pte_unmap(vmf->pte); + softleaf_entry_wait_on_locked(entry, vmf->ptl); } } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; @@ -6814,11 +6815,16 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) pudp = pud_offset(p4dp, address); pud = pudp_get(pudp); - if (pud_none(pud)) + if (!pud_present(pud)) goto out; if (pud_leaf(pud)) { lock = pud_lock(mm, pudp); - if (!unlikely(pud_leaf(pud))) { + pud = pudp_get(pudp); + + if (unlikely(!pud_present(pud))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pud_leaf(pud))) { spin_unlock(lock); goto retry; } @@ -6830,9 +6836,16 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) pmdp = pmd_offset(pudp, address); pmd = pmdp_get_lockless(pmdp); + if (!pmd_present(pmd)) + goto out; if (pmd_leaf(pmd)) { lock = pmd_lock(mm, pmdp); - if (!unlikely(pmd_leaf(pmd))) { + pmd = pmdp_get(pmdp); + + if (unlikely(!pmd_present(pmd))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pmd_leaf(pmd))) { spin_unlock(lock); goto retry; }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bc80502..05a4795 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c
@@ -1209,6 +1209,13 @@ int online_pages(unsigned long pfn, unsigned long nr_pages, if (node_arg.nid >= 0) node_set_state(nid, N_MEMORY); + /* + * Check whether we are adding normal memory to the node for the first + * time. + */ + if (!node_state(nid, N_NORMAL_MEMORY) && zone_idx(zone) <= ZONE_NORMAL) + node_set_state(nid, N_NORMAL_MEMORY); + if (need_zonelists_rebuild) build_all_zonelists(NULL); @@ -1908,6 +1915,8 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages, unsigned long flags; char *reason; int ret; + unsigned long normal_pages = 0; + enum zone_type zt; /* * {on,off}lining is constrained to full memory sections (or more @@ -2056,6 +2065,17 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages, init_per_zone_wmark_min(); /* + * Check whether this operation removes the last normal memory from + * the node. We do this before clearing N_MEMORY to avoid the possible + * transient "!N_MEMORY && N_NORMAL_MEMORY" state. + */ + if (zone_idx(zone) <= ZONE_NORMAL) { + for (zt = 0; zt <= ZONE_NORMAL; zt++) + normal_pages += pgdat->node_zones[zt].present_pages; + if (!normal_pages) + node_clear_state(node, N_NORMAL_MEMORY); + } + /* * Make sure to mark the node as memory-less before rebuilding the zone * list. Otherwise this node would still appear in the fallback lists. */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0e5175f..cf92bd6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c
@@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; - kmem_cache_free(policy_cache, pol); + /* + * Required to allow mmap_lock_speculative*() access, see for example + * futex_key_to_node_opt(). All accesses are serialized by mmap_lock, + * however the speculative lock section unbound by the normal lock + * boundaries, requiring RCU freeing. + */ + kfree_rcu(pol, rcu); } EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm"); @@ -1020,7 +1026,7 @@ static int vma_replace_policy(struct vm_area_struct *vma, } old = vma->vm_policy; - vma->vm_policy = new; /* protected by mmap_lock */ + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ mpol_put(old); return 0;
diff --git a/mm/migrate.c b/mm/migrate.c index 1bf2cf8..2c3d489 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -500,7 +500,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (!softleaf_is_migration(entry)) goto out; - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; out: spin_unlock(ptl); @@ -532,10 +532,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the * pgtable lock released. See comment right above pgtable - * lock release in migration_entry_wait_on_locked(). + * lock release in softleaf_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; } @@ -553,7 +553,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) ptl = pmd_lock(mm, pmd); if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); + softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 0a8b319..8079676 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c
@@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, } if (softleaf_is_migration(entry)) { - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; }
diff --git a/mm/mm_init.c b/mm/mm_init.c index 61d983d..df34797 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c
@@ -1896,7 +1896,11 @@ static void __init free_area_init(void) for_each_node(nid) { pg_data_t *pgdat; - if (!node_online(nid)) + /* + * If an architecture has not allocated node data for + * this node, presume the node is memoryless or offline. + */ + if (!NODE_DATA(nid)) alloc_offline_node_data(nid); pgdat = NODE_DATA(nid);
diff --git a/mm/mseal.c b/mm/mseal.c index 316b5e1..ac58643 100644 --- a/mm/mseal.c +++ b/mm/mseal.c
@@ -56,7 +56,6 @@ static int mseal_apply(struct mm_struct *mm, unsigned long start, unsigned long end) { struct vm_area_struct *vma, *prev; - unsigned long curr_start = start; VMA_ITERATOR(vmi, mm, start); /* We know there are no gaps so this will be non-NULL. */ @@ -66,6 +65,7 @@ static int mseal_apply(struct mm_struct *mm, prev = vma; for_each_vma_range(vmi, vma, end) { + const unsigned long curr_start = MAX(vma->vm_start, start); const unsigned long curr_end = MIN(vma->vm_end, end); if (!(vma->vm_flags & VM_SEALED)) { @@ -79,7 +79,6 @@ static int mseal_apply(struct mm_struct *mm, } prev = vma; - curr_start = curr_end; } return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 601a5e0..c1a4b32 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c
@@ -1858,6 +1858,27 @@ static int balance_dirty_pages(struct bdi_writeback *wb, break; } + /* + * Unconditionally start background writeback if it's not + * already in progress. We need to do this because the global + * dirty threshold check above (nr_dirty > gdtc->bg_thresh) + * doesn't account for these cases: + * + * a) strictlimit BDIs: throttling is calculated using per-wb + * thresholds. The per-wb threshold can be exceeded even when + * nr_dirty < gdtc->bg_thresh + * + * b) memcg-based throttling: memcg uses its own dirty count and + * thresholds and can trigger throttling even when global + * nr_dirty < gdtc->bg_thresh + * + * Writeback needs to be started else the writer stalls in the + * throttle loop waiting for dirty pages to be written back + * while no writeback is running. + */ + if (unlikely(!writeback_in_progress(wb))) + wb_start_background_writeback(wb); + mem_cgroup_flush_foreign(wb); /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fcc3273..2d4b6f1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -6928,7 +6928,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask) { const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM; const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | - __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO; + __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO | + __GFP_SKIP_KASAN; const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN; /*
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index a94c401..4e7bcd9 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c
@@ -97,6 +97,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { + pud_t pudval = pudp_get(pud); pmd_t *pmd; unsigned long next; const struct mm_walk_ops *ops = walk->ops; @@ -105,6 +106,24 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, int err = 0; int depth = real_depth(3); + /* + * For PTE handling, pte_offset_map_lock() takes care of checking + * whether there actually is a page table. But it also has to be + * very careful about concurrent page table reclaim. + * + * Similarly, we have to be careful here - a PUD entry that points + * to a PMD table cannot go away, so we can just walk it. But if + * it's something else, we need to ensure we didn't race something, + * so need to retry. + * + * A pertinent example of this is a PUD refault after PUD split - + * we will need to split again or risk accessing invalid memory. + */ + if (!pud_present(pudval) || pud_leaf(pudval)) { + walk->action = ACTION_AGAIN; + return 0; + } + pmd = pmd_offset(pud, addr); do { again: @@ -218,12 +237,12 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, else if (pud_leaf(*pud) || !pud_present(*pud)) continue; /* Nothing to do. */ - if (pud_none(*pud)) - goto again; - err = walk_pmd_range(pud, addr, next, walk); if (err) break; + + if (walk->action == ACTION_AGAIN) + goto again; } while (pud++, addr = next, addr != end); return err;
diff --git a/mm/rmap.c b/mm/rmap.c index 0f00570d..8f08090 100644 --- a/mm/rmap.c +++ b/mm/rmap.c
@@ -457,6 +457,13 @@ static void cleanup_partial_anon_vmas(struct vm_area_struct *vma) list_del(&avc->same_vma); anon_vma_chain_free(avc); } + + /* + * The anon_vma assigned to this VMA is no longer valid, as we were not + * able to correctly clone AVC state. Avoid inconsistent anon_vma tree + * state by resetting. + */ + vma->anon_vma = NULL; } /** @@ -1955,7 +1962,14 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio, if (userfaultfd_wp(vma)) return 1; - return folio_pte_batch(folio, pvmw->pte, pte, max_nr); + /* + * If unmap fails, we need to restore the ptes. To avoid accidentally + * upgrading write permissions for ptes that were not originally + * writable, and to avoid losing the soft-dirty bit, use the + * appropriate FPB flags. + */ + return folio_pte_batch_flags(folio, vma, pvmw->pte, &pte, max_nr, + FPB_RESPECT_WRITE | FPB_RESPECT_SOFT_DIRTY); } /* @@ -2443,11 +2457,17 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, __maybe_unused pmd_t pmdval; if (flags & TTU_SPLIT_HUGE_PMD) { + /* + * split_huge_pmd_locked() might leave the + * folio mapped through PTEs. Retry the walk + * so we can detect this scenario and properly + * abort the walk. + */ split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, true); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + flags &= ~TTU_SPLIT_HUGE_PMD; + page_vma_mapped_walk_restart(&pvmw); + continue; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION pmdval = pmdp_get(pvmw.pmd);
diff --git a/mm/slab.h b/mm/slab.h index 71c7261..e9ab292 100644 --- a/mm/slab.h +++ b/mm/slab.h
@@ -59,7 +59,7 @@ struct freelist_counters { * to save memory. In case ->stride field is not available, * such optimizations are disabled. */ - unsigned short stride; + unsigned int stride; #endif }; }; @@ -290,14 +290,14 @@ static inline void *nearest_obj(struct kmem_cache *cache, /* Determine object index from a given position */ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, - void *addr, void *obj) + void *addr, const void *obj) { return reciprocal_divide(kasan_reset_tag(obj) - addr, cache->reciprocal_size); } static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) + const struct slab *slab, const void *obj) { if (is_kfence_address(obj)) return 0; @@ -559,20 +559,20 @@ static inline void put_slab_obj_exts(unsigned long obj_exts) } #ifdef CONFIG_64BIT -static inline void slab_set_stride(struct slab *slab, unsigned short stride) +static inline void slab_set_stride(struct slab *slab, unsigned int stride) { slab->stride = stride; } -static inline unsigned short slab_get_stride(struct slab *slab) +static inline unsigned int slab_get_stride(struct slab *slab) { return slab->stride; } #else -static inline void slab_set_stride(struct slab *slab, unsigned short stride) +static inline void slab_set_stride(struct slab *slab, unsigned int stride) { VM_WARN_ON_ONCE(stride != sizeof(struct slabobj_ext)); } -static inline unsigned short slab_get_stride(struct slab *slab) +static inline unsigned int slab_get_stride(struct slab *slab) { return sizeof(struct slabobj_ext); }
diff --git a/mm/slub.c b/mm/slub.c index 862642c..2b2d33c 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -2041,18 +2041,18 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG -static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) +static inline void mark_obj_codetag_empty(const void *obj) { - struct slab *obj_exts_slab; + struct slab *obj_slab; unsigned long slab_exts; - obj_exts_slab = virt_to_slab(obj_exts); - slab_exts = slab_obj_exts(obj_exts_slab); + obj_slab = virt_to_slab(obj); + slab_exts = slab_obj_exts(obj_slab); if (slab_exts) { get_slab_obj_exts(slab_exts); - unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, - obj_exts_slab, obj_exts); - struct slabobj_ext *ext = slab_obj_ext(obj_exts_slab, + unsigned int offs = obj_to_index(obj_slab->slab_cache, + obj_slab, obj); + struct slabobj_ext *ext = slab_obj_ext(obj_slab, slab_exts, offs); if (unlikely(is_codetag_empty(&ext->ref))) { @@ -2090,7 +2090,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ -static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} +static inline void mark_obj_codetag_empty(const void *obj) {} static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, struct slabobj_ext *vec, unsigned int objects) {} @@ -2119,13 +2119,6 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s, size_t sz = sizeof(struct slabobj_ext) * slab->objects; struct kmem_cache *obj_exts_cache; - /* - * slabobj_ext array for KMALLOC_CGROUP allocations - * are served from KMALLOC_NORMAL caches. - */ - if (!mem_alloc_profiling_enabled()) - return sz; - if (sz > KMALLOC_MAX_CACHE_SIZE) return sz; @@ -2196,7 +2189,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, retry: old_exts = READ_ONCE(slab->obj_exts); handle_failed_objexts_alloc(old_exts, vec, objects); - slab_set_stride(slab, sizeof(struct slabobj_ext)); if (new_slab) { /* @@ -2211,7 +2203,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, * assign slabobj_exts in parallel. In this case the existing * objcg vector should be reused. */ - mark_objexts_empty(vec); + mark_obj_codetag_empty(vec); if (unlikely(!allow_spin)) kfree_nolock(vec); else @@ -2254,7 +2246,7 @@ static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) * NULL, therefore replace NULL with CODETAG_EMPTY to indicate that * the extension for obj_exts is expected to be NULL. */ - mark_objexts_empty(obj_exts); + mark_obj_codetag_empty(obj_exts); if (allow_spin) kfree(obj_exts); else @@ -2272,6 +2264,9 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) void *addr; unsigned long obj_exts; + /* Initialize stride early to avoid memory ordering issues */ + slab_set_stride(slab, sizeof(struct slabobj_ext)); + if (!need_slab_obj_exts(s)) return; @@ -2288,7 +2283,6 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) obj_exts |= MEMCG_DATA_OBJEXTS; #endif slab->obj_exts = obj_exts; - slab_set_stride(slab, sizeof(struct slabobj_ext)); } else if (s->flags & SLAB_OBJ_EXT_IN_OBJ) { unsigned int offset = obj_exts_offset_in_object(s); @@ -2312,6 +2306,10 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab) #else /* CONFIG_SLAB_OBJ_EXT */ +static inline void mark_obj_codetag_empty(const void *obj) +{ +} + static inline void init_slab_obj_exts(struct slab *slab) { } @@ -2783,6 +2781,16 @@ static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) { + /* + * If the sheaf was created with __GFP_NO_OBJ_EXT flag then its + * corresponding extension is NULL and alloc_tag_sub() will throw a + * warning, therefore replace NULL with CODETAG_EMPTY to indicate + * that the extension for this sheaf is expected to be NULL. + */ + if (s->flags & SLAB_KMALLOC) + mark_obj_codetag_empty(sheaf); + + VM_WARN_ON_ONCE(sheaf->size > 0); kfree(sheaf); stat(s, SHEAF_FREE); @@ -2814,6 +2822,7 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, return 0; } +static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf); static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) { @@ -2822,7 +2831,8 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) if (!sheaf) return NULL; - if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) { + if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { + sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); return NULL; } @@ -2844,19 +2854,19 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); * object pointers are moved to a on-stack array under the lock. To bound the * stack usage, limit each batch to PCS_BATCH_MAX. * - * returns true if at least partially flushed + * Must be called with s->cpu_sheaves->lock locked, returns with the lock + * unlocked. + * + * Returns how many objects are remaining to be flushed */ -static bool sheaf_flush_main(struct kmem_cache *s) +static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) { struct slub_percpu_sheaves *pcs; unsigned int batch, remaining; void *objects[PCS_BATCH_MAX]; struct slab_sheaf *sheaf; - bool ret = false; -next_batch: - if (!local_trylock(&s->cpu_sheaves->lock)) - return ret; + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); pcs = this_cpu_ptr(s->cpu_sheaves); sheaf = pcs->main; @@ -2874,10 +2884,37 @@ static bool sheaf_flush_main(struct kmem_cache *s) stat_add(s, SHEAF_FLUSH, batch); - ret = true; + return remaining; +} - if (remaining) - goto next_batch; +static void sheaf_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + + do { + local_lock(&s->cpu_sheaves->lock); + + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); +} + +/* + * Returns true if the main sheaf was at least partially flushed. + */ +static bool sheaf_try_flush_main(struct kmem_cache *s) +{ + unsigned int remaining; + bool ret = false; + + do { + if (!local_trylock(&s->cpu_sheaves->lock)) + return ret; + + ret = true; + remaining = __sheaf_flush_main_batch(s); + + } while (remaining); return ret; } @@ -4526,7 +4563,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty = NULL; struct slab_sheaf *full; struct node_barn *barn; - bool can_alloc; + bool allow_spin; lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); @@ -4547,8 +4584,9 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return NULL; } - full = barn_replace_empty_sheaf(barn, pcs->main, - gfpflags_allow_spinning(gfp)); + allow_spin = gfpflags_allow_spinning(gfp); + + full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin); if (full) { stat(s, BARN_GET); @@ -4558,9 +4596,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, stat(s, BARN_GET_FAIL); - can_alloc = gfpflags_allow_blocking(gfp); - - if (can_alloc) { + if (allow_spin) { if (pcs->spare) { empty = pcs->spare; pcs->spare = NULL; @@ -4570,18 +4606,20 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, } local_unlock(&s->cpu_sheaves->lock); + pcs = NULL; - if (!can_alloc) + if (!allow_spin) return NULL; if (empty) { - if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) { + if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { full = empty; } else { /* * we must be very low on memory so don't bother * with the barn */ + sheaf_flush_unused(s, empty); free_empty_sheaf(s, empty); } } else { @@ -4591,11 +4629,8 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, if (!full) return NULL; - /* - * we can reach here only when gfpflags_allow_blocking - * so this must not be an irq - */ - local_lock(&s->cpu_sheaves->lock); + if (!local_trylock(&s->cpu_sheaves->lock)) + goto barn_put; pcs = this_cpu_ptr(s->cpu_sheaves); /* @@ -4626,6 +4661,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, return pcs; } +barn_put: barn_put_full_sheaf(barn, full); stat(s, BARN_PUT); @@ -4890,9 +4926,14 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s, struct slab_sheaf *sheaf, gfp_t gfp) { - int ret = 0; + gfp_t gfp_nomemalloc; + int ret; - ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC); + gfp_nomemalloc = gfp | __GFP_NOMEMALLOC; + if (gfp_pfmemalloc_allowed(gfp)) + gfp_nomemalloc |= __GFP_NOWARN; + + ret = refill_sheaf(s, sheaf, gfp_nomemalloc); if (likely(!ret || !gfp_pfmemalloc_allowed(gfp))) return ret; @@ -5685,7 +5726,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, if (put_fail) stat(s, BARN_PUT_FAIL); - if (!sheaf_flush_main(s)) + if (!sheaf_try_flush_main(s)) return NULL; if (!local_trylock(&s->cpu_sheaves->lock))
diff --git a/mm/swap_state.c b/mm/swap_state.c index 6d0eef7..48aff2c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c
@@ -494,6 +494,10 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, __folio_set_locked(folio); __folio_set_swapbacked(folio); + + if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) + goto failed; + for (;;) { ret = swap_cache_add_folio(folio, entry, &shadow); if (!ret) @@ -514,11 +518,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, goto failed; } - if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) { - swap_cache_del_folio(folio); - goto failed; - } - memcg1_swapin(entry, folio_nr_pages(folio)); if (shadow) workingset_refault(folio, shadow);
diff --git a/mm/vma.c b/mm/vma.c index be64f78..c8df5f5 100644 --- a/mm/vma.c +++ b/mm/vma.c
@@ -2781,6 +2781,13 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, if (map.charged) vm_unacct_memory(map.charged); abort_munmap: + /* + * This indicates that .mmap_prepare has set a new file, differing from + * desc->vm_file. But since we're aborting the operation, only the + * original file will be cleaned up. Ensure we clean up both. + */ + if (map.file_doesnt_need_get) + fput(map.file); vms_abort_munmap_vmas(&map.vms, &map.mas_detach); return error; }
diff --git a/mm/zswap.c b/mm/zswap.c index e6ec329..16b2ef7 100644 --- a/mm/zswap.c +++ b/mm/zswap.c
@@ -942,9 +942,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) /* zswap entries of length PAGE_SIZE are not compressed. */ if (entry->length == PAGE_SIZE) { + void *dst; + WARN_ON_ONCE(input->length != PAGE_SIZE); - memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE); + + dst = kmap_local_folio(folio, 0); + memcpy_from_sglist(dst, input, 0, PAGE_SIZE); dlen = PAGE_SIZE; + kunmap_local(dst); + flush_dcache_folio(folio); } else { sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0);
diff --git a/net/atm/lec.c b/net/atm/lec.c index a107375c..10e260a 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c
@@ -154,10 +154,19 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) /* 0x01 is topology change */ priv = netdev_priv(dev); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); + struct atm_vcc *vcc; + + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (vcc) { + atm_force_charge(vcc, skb2->truesize); + sk = sk_atm(vcc); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk); + } else { + dev_kfree_skb(skb2); + } + rcu_read_unlock(); } } #endif /* IS_ENABLED(CONFIG_BRIDGE) */ @@ -216,7 +225,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, int is_rdesc; pr_debug("called\n"); - if (!priv->lecd) { + if (!rcu_access_pointer(priv->lecd)) { pr_info("%s:No lecd attached\n", dev->name); dev->stats.tx_errors++; netif_stop_queue(dev); @@ -449,10 +458,19 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) break; skb2->len = sizeof(struct atmlec_msg); skb_copy_to_linear_data(skb2, mesg, sizeof(*mesg)); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk); + struct atm_vcc *vcc; + + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (vcc) { + atm_force_charge(vcc, skb2->truesize); + sk = sk_atm(vcc); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk); + } else { + dev_kfree_skb(skb2); + } + rcu_read_unlock(); } } #endif /* IS_ENABLED(CONFIG_BRIDGE) */ @@ -468,23 +486,16 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) static void lec_atm_close(struct atm_vcc *vcc) { - struct sk_buff *skb; struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = netdev_priv(dev); - priv->lecd = NULL; + rcu_assign_pointer(priv->lecd, NULL); + synchronize_rcu(); /* Do something needful? */ netif_stop_queue(dev); lec_arp_destroy(priv); - if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) - pr_info("%s closing with messages pending\n", dev->name); - while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue))) { - atm_return(vcc, skb->truesize); - dev_kfree_skb(skb); - } - pr_info("%s: Shut down!\n", dev->name); module_put(THIS_MODULE); } @@ -510,12 +521,14 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, const unsigned char *mac_addr, const unsigned char *atm_addr, struct sk_buff *data) { + struct atm_vcc *vcc; struct sock *sk; struct sk_buff *skb; struct atmlec_msg *mesg; - if (!priv || !priv->lecd) + if (!priv || !rcu_access_pointer(priv->lecd)) return -1; + skb = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); if (!skb) return -1; @@ -532,18 +545,27 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, if (atm_addr) memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN); - atm_force_charge(priv->lecd, skb->truesize); - sk = sk_atm(priv->lecd); + rcu_read_lock(); + vcc = rcu_dereference(priv->lecd); + if (!vcc) { + rcu_read_unlock(); + kfree_skb(skb); + return -1; + } + + atm_force_charge(vcc, skb->truesize); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); if (data != NULL) { pr_debug("about to send %d bytes of data\n", data->len); - atm_force_charge(priv->lecd, data->truesize); + atm_force_charge(vcc, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); sk->sk_data_ready(sk); } + rcu_read_unlock(); return 0; } @@ -618,7 +640,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) atm_return(vcc, skb->truesize); if (*(__be16 *) skb->data == htons(priv->lecid) || - !priv->lecd || !(dev->flags & IFF_UP)) { + !rcu_access_pointer(priv->lecd) || !(dev->flags & IFF_UP)) { /* * Probably looping back, or if lecd is missing, * lecd has gone down @@ -753,12 +775,12 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) priv = netdev_priv(dev_lec[i]); } else { priv = netdev_priv(dev_lec[i]); - if (priv->lecd) + if (rcu_access_pointer(priv->lecd)) return -EADDRINUSE; } lec_arp_init(priv); priv->itfnum = i; /* LANE2 addition */ - priv->lecd = vcc; + rcu_assign_pointer(priv->lecd, vcc); vcc->dev = &lecatm_dev; vcc_insert_socket(sk_atm(vcc)); @@ -1260,24 +1282,28 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry) struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); struct net_device *dev = (struct net_device *)vcc->proto_data; - vcc->pop = vpriv->old_pop; - if (vpriv->xoff) - netif_wake_queue(dev); - kfree(vpriv); - vcc->user_back = NULL; - vcc->push = entry->old_push; - vcc_release_async(vcc, -EPIPE); + if (vpriv) { + vcc->pop = vpriv->old_pop; + if (vpriv->xoff) + netif_wake_queue(dev); + kfree(vpriv); + vcc->user_back = NULL; + vcc->push = entry->old_push; + vcc_release_async(vcc, -EPIPE); + } entry->vcc = NULL; } if (entry->recv_vcc) { struct atm_vcc *vcc = entry->recv_vcc; struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - kfree(vpriv); - vcc->user_back = NULL; + if (vpriv) { + kfree(vpriv); + vcc->user_back = NULL; - entry->recv_vcc->push = entry->old_recv_push; - vcc_release_async(entry->recv_vcc, -EPIPE); + entry->recv_vcc->push = entry->old_recv_push; + vcc_release_async(entry->recv_vcc, -EPIPE); + } entry->recv_vcc = NULL; } }
diff --git a/net/atm/lec.h b/net/atm/lec.h index be0e266..ec85709 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h
@@ -91,7 +91,7 @@ struct lec_priv { */ spinlock_t lec_arp_lock; struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc *lecd; + struct atm_vcc __rcu *lecd; struct delayed_work lec_arp_work; /* C10 */ unsigned int maximum_unknown_frame_count; /*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b75c222..f28e9cb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c
@@ -473,6 +473,9 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (aggregated_bytes > max_bytes) return false; + if (skb_tailroom(forw_packet->skb) < packet_len) + return false; + if (packet_num >= BATADV_MAX_AGGREGATION_PACKETS) return false;
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 2ce4e5b..fdc2abe 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c
@@ -111,7 +111,15 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, /* unsupported WiFi driver version */ goto default_throughput; - real_netdev = batadv_get_real_netdev(hard_iface->net_dev); + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + real_netdev = __batadv_get_real_netdev(hard_iface->net_dev); + rtnl_unlock(); if (!real_netdev) goto default_throughput;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 7b7640f..d6732c3 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c
@@ -204,7 +204,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev) } /** - * batadv_get_real_netdevice() - check if the given netdev struct is a virtual + * __batadv_get_real_netdev() - check if the given netdev struct is a virtual * interface on top of another 'real' interface * @netdev: the device to check * @@ -214,7 +214,7 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev) * Return: the 'real' net device or the original net device and NULL in case * of an error. */ -static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) +struct net_device *__batadv_get_real_netdev(struct net_device *netdev) { struct batadv_hard_iface *hard_iface = NULL; struct net_device *real_netdev = NULL; @@ -267,7 +267,7 @@ struct net_device *batadv_get_real_netdev(struct net_device *net_device) struct net_device *real_netdev; rtnl_lock(); - real_netdev = batadv_get_real_netdevice(net_device); + real_netdev = __batadv_get_real_netdev(net_device); rtnl_unlock(); return real_netdev; @@ -336,7 +336,7 @@ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device) if (batadv_is_cfg80211_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; - real_netdev = batadv_get_real_netdevice(net_device); + real_netdev = __batadv_get_real_netdev(net_device); if (!real_netdev) return wifi_flags;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 9db8a31..9ba8fb2 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h
@@ -67,6 +67,7 @@ enum batadv_hard_if_bcast { extern struct notifier_block batadv_hard_if_notifier; +struct net_device *__batadv_get_real_netdev(struct net_device *net_device); struct net_device *batadv_get_real_netdev(struct net_device *net_device); bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface); bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4719dac0..11d3ad8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c
@@ -1843,9 +1843,13 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) u8 aux_num_cis = 0; u8 cis_id; + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_cig(hdev, cig_id); - if (!conn) + if (!conn) { + hci_dev_unlock(hdev); return 0; + } qos = &conn->iso_qos; pdu->cig_id = cig_id; @@ -1884,6 +1888,8 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) } pdu->num_cis = aux_num_cis; + hci_dev_unlock(hdev); + if (!pdu->num_cis) return 0; @@ -1944,6 +1950,8 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) return false; done: + conn->iso_qos = *qos; + if (hci_cmd_sync_queue(hdev, set_cig_params_sync, UINT_PTR(qos->ucast.cig), NULL) < 0) return false; @@ -2013,8 +2021,6 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, } hci_conn_hold(cis); - - cis->iso_qos = *qos; cis->state = BT_BOUND; return cis; @@ -3095,7 +3101,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * hci_connect_le serializes the connection attempts so only one * connection can be in BT_CONNECT at time. */ - if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { + if (conn->state == BT_CONNECT && READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 31308c1..01f8cee 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c
@@ -4126,7 +4126,7 @@ static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } - if (hdev->req_status == HCI_REQ_PEND && + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 286529d..3ebc5e6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c
@@ -80,6 +80,10 @@ static void *hci_le_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, return data; } +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock); + static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3111,6 +3115,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); /* Check for existing connection: * @@ -3274,6 +3279,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); + hci_dev_unlock(hdev); + /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ @@ -5021,6 +5030,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { @@ -5713,6 +5723,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, int err; hci_dev_lock(hdev); + hci_store_wake_reason(hdev, bdaddr, bdaddr_type); /* All controllers implicitly stop advertising in the event of a * connection, so ensure that the state bit is cleared. @@ -6005,6 +6016,7 @@ static void hci_le_past_received_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6403,6 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, @@ -6491,6 +6505,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, info->length)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); @@ -6536,6 +6552,7 @@ static void hci_le_pa_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6767,25 +6784,31 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, latency = le16_to_cpu(ev->latency); timeout = le16_to_cpu(ev->timeout); + hci_dev_lock(hdev); + hcon = hci_conn_hash_lookup_handle(hdev, handle); - if (!hcon || hcon->state != BT_CONNECTED) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_UNKNOWN_CONN_ID); + if (!hcon || hcon->state != BT_CONNECTED) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + goto unlock; + } - if (max > hcon->le_conn_max_interval) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (max > hcon->le_conn_max_interval) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } - if (hci_check_conn_params(min, max, latency, timeout)) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } if (hcon->role == HCI_ROLE_MASTER) { struct hci_conn_params *params; u8 store_hint; - hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &hcon->dst, hcon->dst_type); if (params) { @@ -6798,8 +6821,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, store_hint = 0x00; } - hci_dev_unlock(hdev); - mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type, store_hint, min, max, latency, timeout); } @@ -6813,6 +6834,9 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, cp.max_ce_len = 0; hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp); + +unlock: + hci_dev_unlock(hdev); } static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, @@ -6834,6 +6858,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, for (i = 0; i < ev->num; i++) { struct hci_ev_le_direct_adv_info *info = &ev->info[i]; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, info->direct_addr_type, HCI_ADV_PHY_1M, 0, @@ -7517,73 +7543,29 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } -static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, - struct sk_buff *skb) +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock) { - struct hci_ev_le_advertising_info *adv; - struct hci_ev_le_direct_adv_info *direct_adv; - struct hci_ev_le_ext_adv_info *ext_adv; - const struct hci_ev_conn_complete *conn_complete = (void *)skb->data; - const struct hci_ev_conn_request *conn_request = (void *)skb->data; - - hci_dev_lock(hdev); + lockdep_assert_held(&hdev->lock); /* If we are currently suspended and this is the first BT event seen, * save the wake reason associated with the event. */ if (!hdev->suspended || hdev->wake_reason) - goto unlock; + return; + + if (!bdaddr) { + hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; + return; + } /* Default to remote wake. Values for wake_reason are documented in the * Bluez mgmt api docs. */ hdev->wake_reason = MGMT_WAKE_REASON_REMOTE_WAKE; - - /* Once configured for remote wakeup, we should only wake up for - * reconnections. It's useful to see which device is waking us up so - * keep track of the bdaddr of the connection event that woke us up. - */ - if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_LE_META) { - struct hci_ev_le_meta *le_ev = (void *)skb->data; - u8 subevent = le_ev->subevent; - u8 *ptr = &skb->data[sizeof(*le_ev)]; - u8 num_reports = *ptr; - - if ((subevent == HCI_EV_LE_ADVERTISING_REPORT || - subevent == HCI_EV_LE_DIRECT_ADV_REPORT || - subevent == HCI_EV_LE_EXT_ADV_REPORT) && - num_reports) { - adv = (void *)(ptr + 1); - direct_adv = (void *)(ptr + 1); - ext_adv = (void *)(ptr + 1); - - switch (subevent) { - case HCI_EV_LE_ADVERTISING_REPORT: - bacpy(&hdev->wake_addr, &adv->bdaddr); - hdev->wake_addr_type = adv->bdaddr_type; - break; - case HCI_EV_LE_DIRECT_ADV_REPORT: - bacpy(&hdev->wake_addr, &direct_adv->bdaddr); - hdev->wake_addr_type = direct_adv->bdaddr_type; - break; - case HCI_EV_LE_EXT_ADV_REPORT: - bacpy(&hdev->wake_addr, &ext_adv->bdaddr); - hdev->wake_addr_type = ext_adv->bdaddr_type; - break; - } - } - } else { - hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; - } - -unlock: - hci_dev_unlock(hdev); + bacpy(&hdev->wake_addr, bdaddr); + hdev->wake_addr_type = addr_type; } #define HCI_EV_VL(_op, _func, _min_len, _max_len) \ @@ -7830,14 +7812,15 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); - /* Store wake reason if we're suspended */ - hci_store_wake_reason(hdev, event, skb); - bt_dev_dbg(hdev, "event 0x%2.2x", event); hci_event_func(hdev, event, skb, &opcode, &status, &req_complete, &req_complete_skb); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, NULL, 0); + hci_dev_unlock(hdev); + if (req_complete) { req_complete(hdev, status, opcode); } else if (req_complete_skb) {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4e7bf63..0290dea 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c
@@ -2166,6 +2166,7 @@ static void hci_sock_destruct(struct sock *sk) mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static const struct proto_ops hci_sock_ops = {
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 44c205f..919ec27 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c
@@ -25,11 +25,11 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, { bt_dev_dbg(hdev, "result 0x%2.2x", result); - if (hdev->req_status != HCI_REQ_PEND) + if (READ_ONCE(hdev->req_status) != HCI_REQ_PEND) return; hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; + WRITE_ONCE(hdev->req_status, HCI_REQ_DONE); /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); @@ -167,20 +167,20 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hci_cmd_sync_add(&req, opcode, plen, param, event, sk); - hdev->req_status = HCI_REQ_PEND; + WRITE_ONCE(hdev->req_status, HCI_REQ_PEND); err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, + READ_ONCE(hdev->req_status) != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); - switch (hdev->req_status) { + switch (READ_ONCE(hdev->req_status)) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; @@ -194,7 +194,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, break; } - hdev->req_status = 0; + WRITE_ONCE(hdev->req_status, 0); hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; @@ -665,9 +665,9 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } @@ -683,12 +683,12 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); - if (hdev->req_status == HCI_REQ_PEND) { + if (READ_ONCE(hdev->req_status) == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; - hdev->req_status = HCI_REQ_CANCELED; + WRITE_ONCE(hdev->req_status, HCI_REQ_CANCELED); wake_up_interruptible(&hdev->req_wait_q); } @@ -780,7 +780,7 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) - return 0; + return -EEXIST; return hci_cmd_sync_queue(hdev, func, data, destroy); } @@ -801,8 +801,15 @@ int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, return -ENETDOWN; /* If on cmd_sync_work then run immediately otherwise queue */ - if (current_work() == &hdev->cmd_sync_work) - return func(hdev, data); + if (current_work() == &hdev->cmd_sync_work) { + int err; + + err = func(hdev, data); + if (destroy) + destroy(hdev, data, err); + + return 0; + } return hci_cmd_sync_submit(hdev, func, data, destroy); } @@ -3255,6 +3262,8 @@ static int update_passive_scan_sync(struct hci_dev *hdev, void *data) int hci_update_passive_scan(struct hci_dev *hdev) { + int err; + /* Only queue if it would have any effect */ if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || @@ -3264,8 +3273,9 @@ int hci_update_passive_scan(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; - return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, - NULL); + err = hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, + NULL); + return (err == -EEXIST) ? 0 : err; } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) @@ -4592,7 +4602,7 @@ static int hci_le_set_host_features_sync(struct hci_dev *hdev) { int err; - if (iso_capable(hdev)) { + if (cis_capable(hdev)) { /* Connected Isochronous Channels (Host Support) */ err = hci_le_set_host_feature_sync(hdev, 32, (iso_enabled(hdev) ? 0x01 : @@ -6627,8 +6637,8 @@ static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) * state. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_scan_disable_sync(hdev); hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); + hci_scan_disable_sync(hdev); } /* Update random address, but set require_privacy to false so @@ -6958,8 +6968,11 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, - NULL); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, + NULL); + return (err == -EEXIST) ? 0 : err; } static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) @@ -6995,8 +7008,11 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, - create_le_conn_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, + create_le_conn_complete); + return (err == -EEXIST) ? 0 : err; } int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -7203,8 +7219,11 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, - create_pa_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); + return (err == -EEXIST) ? 0 : err; } static void create_big_complete(struct hci_dev *hdev, void *data, int err) @@ -7222,7 +7241,8 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) { - DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, + HCI_MAX_ISO_BIS); struct hci_conn *conn = data; struct bt_iso_qos *qos = &conn->iso_qos; int err; @@ -7266,8 +7286,11 @@ static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, - create_big_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); + return (err == -EEXIST) ? 0 : err; } struct past_data { @@ -7359,7 +7382,7 @@ int hci_past_sync(struct hci_conn *conn, struct hci_conn *le) if (err) kfree(data); - return err; + return (err == -EEXIST) ? 0 : err; } static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) @@ -7368,10 +7391,8 @@ static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - if (err == -ECANCELED) - return; - hci_conn_drop(conn); + hci_conn_put(conn); } static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev, @@ -7438,15 +7459,20 @@ int hci_le_read_remote_features(struct hci_conn *conn) * role is possible. Otherwise just transition into the * connected state without requesting the remote features. */ - if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) + if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) { err = hci_cmd_sync_queue_once(hdev, hci_le_read_remote_features_sync, - hci_conn_hold(conn), + hci_conn_hold(hci_conn_get(conn)), le_read_features_complete); - else + if (err) { + hci_conn_drop(conn); + hci_conn_put(conn); + } + } else { err = -EOPNOTSUPP; + } - return err; + return (err == -EEXIST) ? 0 : err; } static void pkt_type_changed(struct hci_dev *hdev, void *data, int err) @@ -7472,6 +7498,7 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) { struct hci_dev *hdev = conn->hdev; struct hci_cp_change_conn_ptype *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7480,8 +7507,12 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) cp->handle = cpu_to_le16(conn->handle); cp->pkt_type = cpu_to_le16(pkt_type); - return hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, - pkt_type_changed); + err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, + pkt_type_changed); + if (err) + kfree(cp); + + return (err == -EEXIST) ? 0 : err; } static void le_phy_update_complete(struct hci_dev *hdev, void *data, int err) @@ -7507,6 +7538,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_set_phy *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7517,6 +7549,10 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) cp->tx_phys = tx_phys; cp->rx_phys = rx_phys; - return hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, - le_phy_update_complete); + err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, + le_phy_update_complete); + if (err) + kfree(cp); + + return (err == -EEXIST) ? 0 : err; }
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6fe8152..7bcf8c5 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c
@@ -986,7 +986,8 @@ static void session_free(struct kref *ref) skb_queue_purge(&session->intr_transmit); fput(session->intr_sock->file); fput(session->ctrl_sock->file); - l2cap_conn_put(session->conn); + if (session->conn) + l2cap_conn_put(session->conn); kfree(session); } @@ -1164,6 +1165,15 @@ static void hidp_session_remove(struct l2cap_conn *conn, down_write(&hidp_session_sem); + /* Drop L2CAP reference immediately to indicate that + * l2cap_unregister_user() shall not be called as it is already + * considered removed. + */ + if (session->conn) { + l2cap_conn_put(session->conn); + session->conn = NULL; + } + hidp_session_terminate(session); cancel_work_sync(&session->dev_init); @@ -1301,7 +1311,9 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - l2cap_unregister_user(session->conn, &session->user); + if (session->conn) + l2cap_unregister_user(session->conn, &session->user); + hidp_session_put(session); module_put_and_kthread_exit(0);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ec61db89..be145e2 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c
@@ -746,6 +746,7 @@ static void iso_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void iso_sock_cleanup_listen(struct sock *parent)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2ad1cb4..95c65fe 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c
@@ -926,16 +926,39 @@ int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) static int l2cap_get_ident(struct l2cap_conn *conn) { + u8 max; + int ident; + /* LE link does not support tools like l2ping so use the full range */ if (conn->hcon->type == LE_LINK) - return ida_alloc_range(&conn->tx_ida, 1, 255, GFP_ATOMIC); - + max = 255; /* Get next available identificator. * 1 - 128 are used by kernel. * 129 - 199 are reserved. * 200 - 254 are used by utilities like l2ping, etc. */ - return ida_alloc_range(&conn->tx_ida, 1, 128, GFP_ATOMIC); + else + max = 128; + + /* Allocate ident using min as last used + 1 (cyclic) */ + ident = ida_alloc_range(&conn->tx_ida, READ_ONCE(conn->tx_ident) + 1, + max, GFP_ATOMIC); + /* Force min 1 to start over */ + if (ident <= 0) { + ident = ida_alloc_range(&conn->tx_ida, 1, max, GFP_ATOMIC); + if (ident <= 0) { + /* If all idents are in use, log an error, this is + * extremely unlikely to happen and would indicate a bug + * in the code that idents are not being freed properly. + */ + BT_ERR("Unable to allocate ident: %d", ident); + return 0; + } + } + + WRITE_ONCE(conn->tx_ident, ident); + + return ident; } static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, @@ -1678,17 +1701,15 @@ static void l2cap_info_timeout(struct work_struct *work) int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) { - struct hci_dev *hdev = conn->hcon->hdev; int ret; /* We need to check whether l2cap_conn is registered. If it is not, we - * must not register the l2cap_user. l2cap_conn_del() is unregisters - * l2cap_conn objects, but doesn't provide its own locking. Instead, it - * relies on the parent hci_conn object to be locked. This itself relies - * on the hci_dev object to be locked. So we must lock the hci device - * here, too. */ + * must not register the l2cap_user. l2cap_conn_del() unregisters + * l2cap_conn objects under conn->lock, and we use the same lock here + * to protect access to conn->users and conn->hchan. + */ - hci_dev_lock(hdev); + mutex_lock(&conn->lock); if (!list_empty(&user->list)) { ret = -EINVAL; @@ -1709,16 +1730,14 @@ int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) ret = 0; out_unlock: - hci_dev_unlock(hdev); + mutex_unlock(&conn->lock); return ret; } EXPORT_SYMBOL(l2cap_register_user); void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) { - struct hci_dev *hdev = conn->hcon->hdev; - - hci_dev_lock(hdev); + mutex_lock(&conn->lock); if (list_empty(&user->list)) goto out_unlock; @@ -1727,7 +1746,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) user->remove(conn, user); out_unlock: - hci_dev_unlock(hdev); + mutex_unlock(&conn->lock); } EXPORT_SYMBOL(l2cap_unregister_user); @@ -1752,6 +1771,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + disable_delayed_work_sync(&conn->info_timer); + disable_delayed_work_sync(&conn->id_addr_timer); + mutex_lock(&conn->lock); kfree_skb(conn->rx_skb); @@ -1767,8 +1789,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) ida_destroy(&conn->tx_ida); - cancel_delayed_work_sync(&conn->id_addr_timer); - l2cap_unregister_all_users(conn); /* Force the connection to be immediately dropped */ @@ -1787,9 +1807,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - cancel_delayed_work_sync(&conn->info_timer); - hci_chan_del(conn->hchan); conn->hchan = NULL; @@ -2381,6 +2398,9 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, /* Remote device may have requested smaller PDUs */ pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + if (!pdu_len) + return -EINVAL; + if (len <= pdu_len) { sar = L2CAP_SAR_UNSEGMENTED; sdu_len = 0; @@ -4316,14 +4336,16 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { set_default_fcs(chan); - if (chan->mode == L2CAP_MODE_ERTM || - chan->mode == L2CAP_MODE_STREAMING) - err = l2cap_ertm_init(chan); + if (chan->state != BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) + err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); + if (err < 0) + l2cap_send_disconn_req(chan, -err); + else + l2cap_chan_ready(chan); + } goto unlock; } @@ -4616,7 +4638,8 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, switch (type) { case L2CAP_IT_FEAT_MASK: - conn->feat_mask = get_unaligned_le32(rsp->data); + if (cmd_len >= sizeof(*rsp) + sizeof(u32)) + conn->feat_mask = get_unaligned_le32(rsp->data); if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { struct l2cap_info_req req; @@ -4635,7 +4658,8 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, break; case L2CAP_IT_FIXED_CHAN: - conn->remote_fixed_chan = rsp->data[0]; + if (cmd_len >= sizeof(*rsp) + sizeof(rsp->data[0])) + conn->remote_fixed_chan = rsp->data[0]; conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -4916,6 +4940,13 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response_unlock; } + /* Check if Key Size is sufficient for the security level */ + if (!l2cap_check_enc_key_size(conn->hcon, pchan)) { + result = L2CAP_CR_LE_BAD_KEY_SIZE; + chan = NULL; + goto response_unlock; + } + /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { result = L2CAP_CR_LE_INVALID_SCID; @@ -5051,18 +5082,28 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, struct l2cap_chan *chan, *pchan; u16 mtu, mps; __le16 psm; - u8 result, len = 0; - int i, num_scid; + u8 result, rsp_len = 0; + int i, num_scid = 0; bool defer = false; if (!enable_ecred) return -EINVAL; + memset(pdu, 0, sizeof(*pdu)); + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } + /* Check if there are no pending channels with the same ident */ + __l2cap_chan_list_id(conn, cmd->ident, l2cap_ecred_list_defer, + &num_scid); + if (num_scid) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); @@ -5071,11 +5112,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + mtu = __le16_to_cpu(req->mtu); mps = __le16_to_cpu(req->mps); if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_CR_LE_UNACCEPT_PARAMS; + result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } @@ -5095,8 +5139,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); - memset(pdu, 0, sizeof(*pdu)); - /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5109,7 +5151,16 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_LE_AUTHENTICATION; + result = pchan->sec_level == BT_SECURITY_MEDIUM ? + L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION; + goto unlock; + } + + /* Check if the listening channel has set an output MTU then the + * requested MTU shall be less than or equal to that value. + */ + if (pchan->omtu && mtu < pchan->omtu) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; goto unlock; } @@ -5121,7 +5172,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("scid[%d] 0x%4.4x", i, scid); pdu->dcid[i] = 0x0000; - len += sizeof(*pdu->dcid); /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { @@ -5188,7 +5238,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, return 0; l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, - sizeof(*pdu) + len, pdu); + sizeof(*pdu) + rsp_len, pdu); return 0; } @@ -5310,14 +5360,14 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, struct l2cap_ecred_reconf_req *req = (void *) data; struct l2cap_ecred_reconf_rsp rsp; u16 mtu, mps, result; - struct l2cap_chan *chan; + struct l2cap_chan *chan[L2CAP_ECRED_MAX_CID] = {}; int i, num_scid; if (!enable_ecred) return -EINVAL; - if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { - result = L2CAP_CR_LE_INVALID_PARAMS; + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { + result = L2CAP_RECONF_INVALID_CID; goto respond; } @@ -5327,42 +5377,69 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u", mtu, mps); if (mtu < L2CAP_ECRED_MIN_MTU) { - result = L2CAP_RECONF_INVALID_MTU; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } if (mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_RECONF_INVALID_MPS; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); + + if (num_scid > L2CAP_ECRED_MAX_CID) { + result = L2CAP_RECONF_INVALID_PARAMS; + goto respond; + } + result = L2CAP_RECONF_SUCCESS; + /* Check if each SCID, MTU and MPS are valid */ for (i = 0; i < num_scid; i++) { u16 scid; scid = __le16_to_cpu(req->scid[i]); - if (!scid) - return -EPROTO; - - chan = __l2cap_get_chan_by_dcid(conn, scid); - if (!chan) - continue; - - /* If the MTU value is decreased for any of the included - * channels, then the receiver shall disconnect all - * included channels. - */ - if (chan->omtu > mtu) { - BT_ERR("chan %p decreased MTU %u -> %u", chan, - chan->omtu, mtu); - result = L2CAP_RECONF_INVALID_MTU; + if (!scid) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; } - chan->omtu = mtu; - chan->remote_mps = mps; + chan[i] = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan[i]) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; + } + + /* The MTU field shall be greater than or equal to the greatest + * current MTU size of these channels. + */ + if (chan[i]->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan[i], + chan[i]->omtu, mtu); + result = L2CAP_RECONF_INVALID_MTU; + goto respond; + } + + /* If more than one channel is being configured, the MPS field + * shall be greater than or equal to the current MPS size of + * each of these channels. If only one channel is being + * configured, the MPS field may be less than the current MPS + * of that channel. + */ + if (chan[i]->remote_mps >= mps && i) { + BT_ERR("chan %p decreased MPS %u -> %u", chan[i], + chan[i]->remote_mps, mps); + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + } + + /* Commit the new MTU and MPS values after checking they are valid */ + for (i = 0; i < num_scid; i++) { + chan[i]->omtu = mtu; + chan[i]->remote_mps = mps; } respond: @@ -5379,7 +5456,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, u8 *data) { struct l2cap_chan *chan, *tmp; - struct l2cap_ecred_conn_rsp *rsp = (void *) data; + struct l2cap_ecred_reconf_rsp *rsp = (void *)data; u16 result; if (cmd_len < sizeof(*rsp)) @@ -5387,7 +5464,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, result = __le16_to_cpu(rsp->result); - BT_DBG("result 0x%4.4x", rsp->result); + BT_DBG("result 0x%4.4x", result); if (!result) return 0; @@ -6556,6 +6633,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits = l2cap_le_rx_credits(chan); + if (chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + if (chan->rx_credits >= return_credits) return; @@ -6617,8 +6698,10 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) return -ENOBUFS; } - if (chan->imtu < skb->len) { - BT_ERR("Too big LE L2CAP PDU"); + if (skb->len > chan->imtu) { + BT_ERR("Too big LE L2CAP PDU: len %u > %u", skb->len, + chan->imtu); + l2cap_send_disconn_req(chan, ECONNRESET); return -ENOBUFS; } @@ -6637,6 +6720,11 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (!chan->sdu) { u16 sdu_len; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) { + err = -EINVAL; + goto failed; + } + sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); @@ -6644,7 +6732,9 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) sdu_len, skb->len, chan->imtu); if (sdu_len > chan->imtu) { - BT_ERR("Too big LE L2CAP SDU length received"); + BT_ERR("Too big LE L2CAP SDU length: len %u > %u", + skb->len, sdu_len); + l2cap_send_disconn_req(chan, ECONNRESET); err = -EMSGSIZE; goto failed; } @@ -6680,6 +6770,7 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (chan->sdu->len + skb->len > chan->sdu_len) { BT_ERR("Too much LE L2CAP data received"); + l2cap_send_disconn_req(chan, ECONNRESET); err = -EINVAL; goto failed; }
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 25f097c..71e8c1b 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c
@@ -1029,10 +1029,17 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - /* Setting is not supported as it's the remote side that - * decides this. - */ - err = -EPERM; + /* Only allow setting output MTU when not connected */ + if (sk->sk_state == BT_CONNECTED) { + err = -EISCONN; + break; + } + + err = copy_safe_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) + break; + + chan->omtu = mtu; break; case BT_RCVMTU: @@ -1691,6 +1698,9 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + lock_sock(sk); parent = bt_sk(sk)->parent; @@ -1816,6 +1826,7 @@ static void l2cap_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a7238fd..b05bb38 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c
@@ -2195,10 +2195,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { - mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, - status); - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, - cmd_status_rsp, &status); + mgmt_cmd_status(cmd->sk, hdev->id, cmd->opcode, status); goto done; } @@ -2481,6 +2478,7 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct mgmt_mesh_tx *mesh_tx; struct mgmt_cp_mesh_send *send = data; struct mgmt_rp_mesh_read_features rp; + u16 expected_len; bool sending; int err = 0; @@ -2488,12 +2486,19 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_NOT_SUPPORTED); - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || - len <= MGMT_MESH_SEND_SIZE || - len > (MGMT_MESH_SEND_SIZE + 31)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_REJECTED); + if (!send->adv_data_len || send->adv_data_len > 31) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_REJECTED); + + expected_len = struct_size(send, adv_data, send->adv_data_len); + if (expected_len != len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); @@ -5358,7 +5363,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, * hci_adv_monitors_clear is about to be called which will take care of * freeing the adv_monitor instances. */ - if (status == -ECANCELED && !mgmt_pending_valid(hdev, cmd)) + if (status == -ECANCELED || !mgmt_pending_valid(hdev, cmd)) return; monitor = cmd->user_data; @@ -5377,7 +5382,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "add monitor %d complete, status %d", @@ -7251,6 +7256,9 @@ static bool ltk_is_valid(struct mgmt_ltk_info *key) if (key->initiator != 0x00 && key->initiator != 0x01) return false; + if (key->enc_size > sizeof(key->val)) + return false; + switch (key->addr.type) { case BDADDR_LE_PUBLIC: return true;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 70a378f..b8458781 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c
@@ -298,7 +298,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, int err = 0; sco_conn_lock(conn); - if (conn->sk) + if (conn->sk || sco_pi(sk)->conn) err = -EBUSY; else __sco_chan_add(conn, sk, parent); @@ -353,9 +353,20 @@ static int sco_connect(struct sock *sk) lock_sock(sk); + /* Recheck state after reacquiring the socket lock, as another + * thread may have changed it (e.g., closed the socket). + */ + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); + hci_conn_drop(hcon); + err = -EBADFD; + goto unlock; + } + err = sco_chan_add(conn, sk, NULL); if (err) { release_sock(sk); + hci_conn_drop(hcon); goto unlock; } @@ -401,7 +412,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) struct sock *sk; sco_conn_lock(conn); - sk = conn->sk; + sk = sco_sock_hold(conn); sco_conn_unlock(conn); if (!sk) @@ -410,11 +421,15 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %u", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } @@ -470,6 +485,7 @@ static void sco_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void sco_sock_cleanup_listen(struct sock *parent) @@ -651,13 +667,18 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_SEQPACKET) - err = -EINVAL; - lock_sock(sk); + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); + return -EBADFD; + } + + if (sk->sk_type != SOCK_SEQPACKET) { + release_sock(sk); + return -EINVAL; + } + /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); release_sock(sk);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index e67bf7b..98f1da4f 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c
@@ -1018,10 +1018,7 @@ static u8 smp_random(struct smp_chan *smp) smp_s1(smp->tk, smp->prnd, smp->rrnd, stk); - if (hcon->pending_sec_level == BT_SECURITY_HIGH) - auth = 1; - else - auth = 0; + auth = test_bit(SMP_FLAG_MITM_AUTH, &smp->flags) ? 1 : 0; /* Even though there's no _RESPONDER suffix this is the * responder STK we're adding for later lookup (the initiator @@ -1826,7 +1823,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (sec_level > conn->hcon->pending_sec_level) conn->hcon->pending_sec_level = sec_level; - /* If we need MITM check that it can be achieved */ + /* If we need MITM check that it can be achieved. */ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { u8 method; @@ -1834,6 +1831,10 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) req->io_capability); if (method == JUST_WORKS || method == JUST_CFM) return SMP_AUTH_REQUIREMENTS; + + /* Force MITM bit if it isn't set by the initiator. */ + auth |= SMP_AUTH_MITM; + rsp.auth_req |= SMP_AUTH_MITM; } key_size = min(req->max_key_size, rsp.max_key_size); @@ -2743,7 +2744,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) && !crypto_memneq(key, smp->local_pk, 64)) { bt_dev_err(hdev, "Remote and local public keys are identical"); - return SMP_UNSPECIFIED; + return SMP_DHKEY_CHECK_FAILED; } memcpy(smp->remote_pk, key, 64);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 1e2b517..6b55958 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c
@@ -251,12 +251,12 @@ struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg) static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, struct sk_buff *request, struct neighbour *n, - __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) + __be16 vlan_proto, u16 vlan_tci) { struct net_device *dev = request->dev; struct net_bridge_vlan_group *vg; + struct nd_msg *na, *ns; struct sk_buff *reply; - struct nd_msg *na; struct ipv6hdr *pip6; int na_olen = 8; /* opt hdr + ETH_ALEN for target */ int ns_olen; @@ -264,7 +264,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, u8 *daddr; u16 pvid; - if (!dev) + if (!dev || skb_linearize(request)) return; len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + @@ -281,17 +281,21 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, skb_set_mac_header(reply, 0); daddr = eth_hdr(request)->h_source; + ns = (struct nd_msg *)(skb_network_header(request) + + sizeof(struct ipv6hdr)); /* Do we need option processing ? */ ns_olen = request->len - (skb_network_offset(request) + sizeof(struct ipv6hdr)) - sizeof(*ns); for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } @@ -472,9 +476,9 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, - skb_vlan_tag_get(skb), msg); + skb_vlan_tag_get(skb)); else - br_nd_send(br, p, skb, n, 0, 0, msg); + br_nd_send(br, p, skb, n, 0, 0); replied = true; }
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c index 2c70fe47..118c7ea 100644 --- a/net/bridge/br_cfm.c +++ b/net/bridge/br_cfm.c
@@ -576,7 +576,7 @@ static void mep_delete_implementation(struct net_bridge *br, /* Empty and free peer MEP list */ hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { - cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); + disable_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu); } @@ -732,7 +732,7 @@ int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, return -ENOENT; } - cc_peer_disable(peer_mep); + disable_delayed_work_sync(&peer_mep->ccm_rx_dwork); hlist_del_rcu(&peer_mep->head); kfree_rcu(peer_mep, rcu);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ee01122..f7502e6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c
@@ -74,7 +74,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { br_do_proxy_suppress_arp(skb, br, vid, NULL); - } else if (IS_ENABLED(CONFIG_IPV6) && + } else if (ipv6_mod_enabled() && skb->protocol == htons(ETH_P_IPV6) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1405f10..2cbae0f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c
@@ -170,7 +170,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb (skb->protocol == htons(ETH_P_ARP) || skb->protocol == htons(ETH_P_RARP))) { br_do_proxy_suppress_arp(skb, br, vid, p); - } else if (IS_ENABLED(CONFIG_IPV6) && + } else if (ipv6_mod_enabled() && skb->protocol == htons(ETH_P_IPV6) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && pskb_may_pull(skb, sizeof(struct ipv6hdr) +
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index ce6f63c..86f0e75d 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c
@@ -196,7 +196,7 @@ static const struct nla_policy br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, @@ -316,7 +316,7 @@ static const struct nla_policy br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, };
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b9b2981..9b55d38 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h
@@ -1345,6 +1345,16 @@ br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, } static inline bool +br_multicast_port_ctx_options_equal(const struct net_bridge_mcast_port *pmctx1, + const struct net_bridge_mcast_port *pmctx2) +{ + return br_multicast_ngroups_get(pmctx1) == + br_multicast_ngroups_get(pmctx2) && + br_multicast_ngroups_get_max(pmctx1) == + br_multicast_ngroups_get_max(pmctx2); +} + +static inline bool br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) { bool vlan_snooping_enabled;
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8fa89b0..5514e1f 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c
@@ -43,9 +43,29 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, u8 range_mc_rtr = br_vlan_multicast_router(range_end); u8 curr_mc_rtr = br_vlan_multicast_router(v_curr); - return v_curr->state == range_end->state && - __vlan_tun_can_enter_range(v_curr, range_end) && - curr_mc_rtr == range_mc_rtr; + if (v_curr->state != range_end->state) + return false; + + if (!__vlan_tun_can_enter_range(v_curr, range_end)) + return false; + + if (curr_mc_rtr != range_mc_rtr) + return false; + + /* Check user-visible priv_flags that affect output */ + if ((v_curr->priv_flags ^ range_end->priv_flags) & + (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED | BR_VLFLAG_MCAST_ENABLED)) + return false; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (!br_vlan_is_master(v_curr) && + !br_multicast_port_ctx_vlan_disabled(&v_curr->port_mcast_ctx) && + !br_multicast_port_ctx_options_equal(&v_curr->port_mcast_ctx, + &range_end->port_mcast_ctx)) + return false; +#endif + + return true; } bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
diff --git a/net/can/af_can.c b/net/can/af_can.c index f70e2ba..7bc86b1 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c
@@ -469,7 +469,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, rcv->can_id = can_id; rcv->mask = mask; - rcv->matches = 0; + atomic_long_set(&rcv->matches, 0); rcv->func = func; rcv->data = data; rcv->ident = ident; @@ -573,7 +573,7 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { rcv->func(skb, rcv->data); - rcv->matches++; + atomic_long_inc(&rcv->matches); } static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
diff --git a/net/can/af_can.h b/net/can/af_can.h index 22f3352..8788701 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h
@@ -52,7 +52,7 @@ struct receiver { struct hlist_node list; canid_t can_id; canid_t mask; - unsigned long matches; + atomic_long_t matches; void (*func)(struct sk_buff *skb, void *data); void *data; char *ident;
diff --git a/net/can/bcm.c b/net/can/bcm.c index b7324e9..fd9fa07 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c
@@ -1176,6 +1176,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (!op) return -ENOMEM; + spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->nframes = msg_head->nframes; op->cfsiz = CFSIZ(msg_head->flags);
diff --git a/net/can/gw.c b/net/can/gw.c index 8ee4d67..0ec99f6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c
@@ -375,10 +375,10 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, return; if (from <= to) { - for (i = crc8->from_idx; i <= crc8->to_idx; i++) + for (i = from; i <= to; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { - for (i = crc8->from_idx; i >= crc8->to_idx; i--) + for (i = from; i >= to; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } @@ -397,7 +397,7 @@ static void cgw_csum_crc8_rel(struct canfd_frame *cf, break; } - cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; + cf->data[res] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf,
diff --git a/net/can/isotp.c b/net/can/isotp.c index da3b72e7..2770f43 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c
@@ -1248,12 +1248,6 @@ static int isotp_release(struct socket *sock) so->ifindex = 0; so->bound = 0; - if (so->rx.buf != so->rx.sbuf) - kfree(so->rx.buf); - - if (so->tx.buf != so->tx.sbuf) - kfree(so->tx.buf); - sock_orphan(sk); sock->sk = NULL; @@ -1622,6 +1616,21 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void isotp_sock_destruct(struct sock *sk) +{ + struct isotp_sock *so = isotp_sk(sk); + + /* do the standard CAN sock destruct work */ + can_sock_destruct(sk); + + /* free potential extended PDU buffers */ + if (so->rx.buf != so->rx.sbuf) + kfree(so->rx.buf); + + if (so->tx.buf != so->tx.sbuf) + kfree(so->tx.buf); +} + static int isotp_init(struct sock *sk) { struct isotp_sock *so = isotp_sk(sk); @@ -1666,6 +1675,9 @@ static int isotp_init(struct sock *sk) list_add_tail(&so->notifier, &isotp_notifier_list); spin_unlock(&isotp_notifier_lock); + /* re-assign default can_sock_destruct() reference */ + sk->sk_destruct = isotp_sock_destruct; + return 0; }
diff --git a/net/can/proc.c b/net/can/proc.c index 0938bf7..de4d05a 100644 --- a/net/can/proc.c +++ b/net/can/proc.c
@@ -196,7 +196,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, " %-5s %03x %08x %pK %pK %8ld %s\n"; seq_printf(m, fmt, DNAME(dev), r->can_id, r->mask, - r->func, r->data, r->matches, r->ident); + r->func, r->data, atomic_long_read(&r->matches), + r->ident); } }
diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 343c841..901b935 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c
@@ -205,9 +205,9 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, s32 result; u64 global_id; void *payload, *payload_end; - int payload_len; + u32 payload_len; char *result_msg; - int result_msg_len; + u32 result_msg_len; int ret = -EINVAL; mutex_lock(&ac->mutex); @@ -217,10 +217,12 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, result = ceph_decode_32(&p); global_id = ceph_decode_64(&p); payload_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, payload_len, bad); payload = p; p += payload_len; ceph_decode_need(&p, end, sizeof(u32), bad); result_msg_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, result_msg_len, bad); result_msg = p; p += result_msg_len; if (p != end)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 5ec3272..50f65820 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c
@@ -392,7 +392,7 @@ static int head_onwire_len(int ctrl_len, bool secure) int head_len; int rem_len; - BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); + BUG_ON(ctrl_len < 1 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); if (secure) { head_len = CEPH_PREAMBLE_SECURE_LEN; @@ -401,9 +401,7 @@ static int head_onwire_len(int ctrl_len, bool secure) head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN; } } else { - head_len = CEPH_PREAMBLE_PLAIN_LEN; - if (ctrl_len) - head_len += ctrl_len + CEPH_CRC_LEN; + head_len = CEPH_PREAMBLE_PLAIN_LEN + ctrl_len + CEPH_CRC_LEN; } return head_len; } @@ -528,11 +526,16 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc) desc->fd_aligns[i] = ceph_decode_16(&p); } - if (desc->fd_lens[0] < 0 || + /* + * This would fire for FRAME_TAG_WAIT (it has one empty + * segment), but we should never get it as client. + */ + if (desc->fd_lens[0] < 1 || desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) { pr_err("bad control segment length %d\n", desc->fd_lens[0]); return -EINVAL; } + if (desc->fd_lens[1] < 0 || desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) { pr_err("bad front segment length %d\n", desc->fd_lens[1]); @@ -549,10 +552,6 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc) return -EINVAL; } - /* - * This would fire for FRAME_TAG_WAIT (it has one empty - * segment), but we should never get it as client. - */ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) { pr_err("last segment empty, segment count %d\n", desc->fd_seg_cnt); @@ -2833,12 +2832,15 @@ static int process_message_header(struct ceph_connection *con, void *p, void *end) { struct ceph_frame_desc *desc = &con->v2.in_desc; - struct ceph_msg_header2 *hdr2 = p; + struct ceph_msg_header2 *hdr2; struct ceph_msg_header hdr; int skip; int ret; u64 seq; + ceph_decode_need(&p, end, sizeof(*hdr2), bad); + hdr2 = p; + /* verify seq# */ seq = le64_to_cpu(hdr2->seq); if ((s64)seq - (s64)con->in_seq < 1) { @@ -2869,6 +2871,10 @@ static int process_message_header(struct ceph_connection *con, WARN_ON(!con->in_msg); WARN_ON(con->in_msg->con != con); return 1; + +bad: + pr_err("failed to decode message header\n"); + return -EINVAL; } static int process_message(struct ceph_connection *con) @@ -2898,6 +2904,11 @@ static int __handle_control(struct ceph_connection *con, void *p) if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE) return process_control(con, p, end); + if (con->state != CEPH_CON_S_OPEN) { + con->error_msg = "protocol error, unexpected message"; + return -EINVAL; + } + ret = process_message_header(con, p, end); if (ret < 0) return ret;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 5136b37..d508053 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c
@@ -72,8 +72,8 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) struct ceph_monmap *monmap = NULL; struct ceph_fsid fsid; u32 struct_len; - int blob_len; - int num_mon; + u32 blob_len; + u32 num_mon; u8 struct_v; u32 epoch; int ret; @@ -112,7 +112,7 @@ static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2) } ceph_decode_32_safe(p, end, num_mon, e_inval); - dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch, + dout("%s fsid %pU epoch %u num_mon %u\n", __func__, &fsid, epoch, num_mon); if (num_mon > CEPH_MAX_MON) goto e_inval;
diff --git a/net/core/dev.c b/net/core/dev.c index 6ff4256..831129f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c
@@ -3769,6 +3769,22 @@ static netdev_features_t dflt_features_check(struct sk_buff *skb, return vlan_features_check(skb, features); } +static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) +{ + if (!skb->encapsulation) + return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr)); + else + return (!skb_inner_network_header_was_set(skb) || + ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + inner_ip_hdr(skb)->version == 6)) && + skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); +} + static netdev_features_t gso_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -3805,10 +3821,15 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * segmentation-offloads.rst). */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - struct iphdr *iph = skb->encapsulation ? - inner_ip_hdr(skb) : ip_hdr(skb); + const struct iphdr *iph; + struct iphdr _iph; + int nhoff = skb->encapsulation ? + skb_inner_network_offset(skb) : + skb_network_offset(skb); - if (!(iph->frag_off & htons(IP_DF))) + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + + if (!iph || !(iph->frag_off & htons(IP_DF))) features &= ~dev->mangleid_features; } @@ -3816,11 +3837,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * so neither does TSO that depends on it. */ if (features & NETIF_F_IPV6_CSUM && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || - (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && - skb_transport_header_was_set(skb) && - skb_network_header_len(skb) != sizeof(struct ipv6hdr)) + skb_gso_has_extension_hdr(skb)) features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); return features; @@ -3987,7 +4004,7 @@ static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb, if (shinfo->nr_frags > 0) { niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); if (net_is_devmem_iov(niov) && - net_devmem_iov_binding(niov)->dev != dev) + READ_ONCE(net_devmem_iov_binding(niov)->dev) != dev) goto out_free; } @@ -4818,10 +4835,9 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - /* Other cpus might concurrently change txq->xmit_lock_owner - * to -1 or to their cpu id, but not to our id. - */ - if (READ_ONCE(txq->xmit_lock_owner) != cpu) { + if (!netif_tx_owned(txq, cpu)) { + bool is_list = false; + if (dev_xmit_recursion()) goto recursion_alert; @@ -4832,17 +4848,28 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { + is_list = !!skb->next; + dev_xmit_recursion_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); dev_xmit_recursion_dec(); - if (dev_xmit_complete(rc)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } + + /* GSO segments a single SKB into + * a list of frames. TCP expects error + * to mean none of the data was sent. + */ + if (is_list) + rc = NETDEV_TX_OK; } HARD_TX_UNLOCK(dev, txq); + if (!skb) /* xmit completed */ + goto out; + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", dev->name); + /* NETDEV_TX_BUSY or queue was stopped */ + if (!is_list) + rc = -ENETDOWN; } else { /* Recursion is detected! It is possible, * unfortunately @@ -4850,10 +4877,10 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) recursion_alert: net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", dev->name); + rc = -ENETDOWN; } } - rc = -ENETDOWN; rcu_read_unlock_bh(); dev_core_stats_tx_dropped_inc(dev); @@ -4992,8 +5019,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow, static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow *rflow, u16 next_cpu, u32 hash, - u32 flow_id) + struct rps_dev_flow *rflow, u16 next_cpu, u32 hash) { if (next_cpu < nr_cpu_ids) { u32 head; @@ -5004,6 +5030,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *tmp_rflow; unsigned int tmp_cpu; u16 rxq_index; + u32 flow_id; int rc; /* Should we steer this flow to a different hardware queue? */ @@ -5019,6 +5046,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!flow_table) goto out; + flow_id = rfs_slot(hash, flow_table); tmp_rflow = &flow_table->flows[flow_id]; tmp_cpu = READ_ONCE(tmp_rflow->cpu); @@ -5066,7 +5094,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow_table *flow_table; struct rps_map *map; int cpu = -1; - u32 flow_id; u32 tcpu; u32 hash; @@ -5113,8 +5140,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - flow_id = rfs_slot(hash, flow_table); - rflow = &flow_table->flows[flow_id]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -5133,8 +5159,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; - rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash, - flow_id); + rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash); } if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { @@ -7783,11 +7808,12 @@ static int napi_thread_wait(struct napi_struct *napi) return -1; } -static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) +static void napi_threaded_poll_loop(struct napi_struct *napi, + unsigned long *busy_poll_last_qs) { + unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies; struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct softnet_data *sd; - unsigned long last_qs = jiffies; for (;;) { bool repoll = false; @@ -7816,12 +7842,12 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) /* When busy poll is enabled, the old packets are not flushed in * napi_complete_done. So flush them here. */ - if (busy_poll) + if (busy_poll_last_qs) gro_flush_normal(&napi->gro, HZ >= 1000); local_bh_enable(); /* Call cond_resched here to avoid watchdog warnings. */ - if (repoll || busy_poll) { + if (repoll || busy_poll_last_qs) { rcu_softirq_qs_periodic(last_qs); cond_resched(); } @@ -7829,11 +7855,15 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) if (!repoll) break; } + + if (busy_poll_last_qs) + *busy_poll_last_qs = last_qs; } static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; + unsigned long last_qs = jiffies; bool want_busy_poll; bool in_busy_poll; unsigned long val; @@ -7851,7 +7881,7 @@ static int napi_threaded_poll(void *data) assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state, want_busy_poll); - napi_threaded_poll_loop(napi, want_busy_poll); + napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL); } return 0; @@ -13164,7 +13194,7 @@ static void run_backlog_napi(unsigned int cpu) { struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); - napi_threaded_poll_loop(&sd->backlog, false); + napi_threaded_poll_loop(&sd->backlog, NULL); } static void backlog_napi_setup(unsigned int cpu)
diff --git a/net/core/dev.h b/net/core/dev.h index 98793a7..781619e 100644 --- a/net/core/dev.h +++ b/net/core/dev.h
@@ -366,41 +366,6 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi) void kick_defer_list_purge(unsigned int cpu); -#define XMIT_RECURSION_LIMIT 8 - -#ifndef CONFIG_PREEMPT_RT -static inline bool dev_xmit_recursion(void) -{ - return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > - XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - __this_cpu_inc(softnet_data.xmit.recursion); -} - -static inline void dev_xmit_recursion_dec(void) -{ - __this_cpu_dec(softnet_data.xmit.recursion); -} -#else -static inline bool dev_xmit_recursion(void) -{ - return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - current->net_xmit.recursion++; -} - -static inline void dev_xmit_recursion_dec(void) -{ - current->net_xmit.recursion--; -} -#endif - int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack);
diff --git a/net/core/devmem.c b/net/core/devmem.c index 8c9aad7..69d79ae 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c
@@ -396,7 +396,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, * net_device. */ dst_dev = dst_dev_rcu(dst); - if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { + if (unlikely(!dst_dev) || + unlikely(dst_dev != READ_ONCE(binding->dev))) { err = -ENODEV; goto out_unlock; } @@ -513,7 +514,8 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv, xa_erase(&binding->bound_rxqs, xa_idx); if (xa_empty(&binding->bound_rxqs)) { mutex_lock(&binding->lock); - binding->dev = NULL; + ASSERT_EXCLUSIVE_WRITER(binding->dev); + WRITE_ONCE(binding->dev, NULL); mutex_unlock(&binding->lock); } break;
diff --git a/net/core/filter.c b/net/core/filter.c index 0d5d5a1..78b5481 100644 --- a/net/core/filter.c +++ b/net/core/filter.c
@@ -2228,6 +2228,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, return -ENOMEM; } + if (unlikely(!ipv6_mod_enabled())) + goto out_drop; + rcu_read_lock(); if (!nh) { dst = skb_dst(skb); @@ -2335,6 +2338,10 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); } else if (nh->nh_family == AF_INET6) { + if (unlikely(!ipv6_mod_enabled())) { + rcu_read_unlock(); + goto out_drop; + } neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); is_v6gw = true; } else if (nh->nh_family == AF_INET) { @@ -4150,12 +4157,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; struct xdp_rxq_info *rxq = xdp->rxq; - unsigned int tailroom; + int tailroom; if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) return -EOPNOTSUPP; - tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); + tailroom = rxq->frag_size - skb_frag_size(frag) - + skb_frag_off(frag) % rxq->frag_size; + WARN_ON_ONCE(tailroom < 0); if (unlikely(offset > tailroom)) return -EINVAL;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a95cfe7..c56a4e7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c
@@ -820,7 +820,8 @@ int pneigh_create(struct neigh_table *tbl, struct net *net, update: WRITE_ONCE(n->flags, flags); n->permanent = permanent; - WRITE_ONCE(n->protocol, protocol); + if (protocol) + WRITE_ONCE(n->protocol, protocol); out: mutex_unlock(&tbl->phash_lock); return err;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a8558a5..cd74bef 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c
@@ -132,7 +132,7 @@ static int netif_local_xmit_active(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + if (netif_tx_owned(txq, smp_processor_id())) return 1; }
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index c82a95b..ee5060d 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c
@@ -245,7 +245,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, goto err_cancel; if (pool->user.detach_time && nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME, - pool->user.detach_time)) + ktime_divns(pool->user.detach_time, NSEC_PER_SEC))) goto err_cancel; if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL)) @@ -337,7 +337,7 @@ int page_pool_list(struct page_pool *pool) void page_pool_detached(struct page_pool *pool) { mutex_lock(&page_pools_lock); - pool->user.detach_time = ktime_get_boottime_seconds(); + pool->user.detach_time = ktime_get_boottime(); netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF); mutex_unlock(&page_pools_lock); }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dad4b10..fae8034e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c
@@ -629,6 +629,9 @@ int rtnl_link_register(struct rtnl_link_ops *ops) unlock: mutex_unlock(&link_ops_mutex); + if (err) + cleanup_srcu_struct(&ops->srcu); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -707,11 +710,14 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) goto out; ops = master_dev->rtnl_link_ops; - if (!ops || !ops->get_slave_size) + if (!ops) + goto out; + size += nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_SLAVE_KIND */ + if (!ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - size = nla_total_size(sizeof(struct nlattr)) + - ops->get_slave_size(master_dev, dev); + size += nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); @@ -1267,6 +1273,21 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) return size; } +static size_t rtnl_dev_parent_size(const struct net_device *dev) +{ + size_t size = 0; + + /* IFLA_PARENT_DEV_NAME */ + if (dev->dev.parent) + size += nla_total_size(strlen(dev_name(dev->dev.parent)) + 1); + + /* IFLA_PARENT_DEV_BUS_NAME */ + if (dev->dev.parent && dev->dev.parent->bus) + size += nla_total_size(strlen(dev->dev.parent->bus->name) + 1); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1328,6 +1349,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + nla_total_size(2) /* IFLA_HEADROOM */ + nla_total_size(2) /* IFLA_TAILROOM */ + + rtnl_dev_parent_size(dev) + 0; if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS))
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 9a39656..6a6f2cd 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c
@@ -20,7 +20,6 @@ #include <net/tcp.h> static siphash_aligned_key_t net_secret; -static siphash_aligned_key_t ts_secret; #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) @@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); } - -static __always_inline void ts_secret_init(void) -{ - net_get_random_once(&ts_secret, sizeof(ts_secret)); -} #endif #ifdef CONFIG_INET @@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr) -{ - const struct { - struct in6_addr saddr; - struct in6_addr daddr; - } __aligned(SIPHASH_ALIGNMENT) combined = { - .saddr = *(struct in6_addr *)saddr, - .daddr = *(struct in6_addr *)daddr, - }; - - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) - return 0; - - ts_secret_init(); - return siphash(&combined, offsetofend(typeof(combined), daddr), - &ts_secret); -} -EXPORT_IPV6_MOD(secure_tcpv6_ts_off); - -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport) +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u32 hash; + union tcp_seq_and_ts_off st; net_secret_init(); - hash = siphash(&combined, offsetofend(typeof(combined), dport), - &net_secret); - return seq_scale(hash); + + st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport), + &net_secret); + + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + st.ts_off = 0; + + st.seq = seq_scale(st.seq); + return st; } -EXPORT_SYMBOL(secure_tcpv6_seq); +EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) -{ - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) - return 0; - - ts_secret_init(); - return siphash_2u32((__force u32)saddr, (__force u32)daddr, - &ts_secret); -} - /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u32 hash; + u32 ports = (__force u32)sport << 16 | (__force u32)dport; + union tcp_seq_and_ts_off st; net_secret_init(); - hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, - (__force u32)sport << 16 | (__force u32)dport, - &net_secret); - return seq_scale(hash); + + st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr, + ports, &net_secret); + + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) + st.ts_off = 0; + + st.seq = seq_scale(st.seq); + return st; } -EXPORT_SYMBOL_GPL(secure_tcp_seq); +EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc47d3e..0e21704 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c
@@ -5590,15 +5590,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { - bool ret; + struct socket *sock; + struct file *file; + bool ret = false; if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; - read_lock_bh(&sk->sk_callback_lock); - ret = sk->sk_socket && sk->sk_socket->file && - file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + rcu_read_lock(); + sock = READ_ONCE(sk->sk_socket); + if (!sock) + goto out; + file = READ_ONCE(sock->file); + if (!file) + goto out; + ret = file_ns_capable(file, &init_user_ns, CAP_NET_RAW); +out: + rcu_read_unlock(); return ret; }
diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2e26174..6187a83 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c
@@ -1205,8 +1205,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) return; psock->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_strp_data_ready; - sk->sk_write_space = sk_psock_write_space; + WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready); + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); } void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) @@ -1216,8 +1216,8 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) if (!psock->saved_data_ready) return; - sk->sk_data_ready = psock->saved_data_ready; - psock->saved_data_ready = NULL; + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); + WRITE_ONCE(psock->saved_data_ready, NULL); strp_stop(&psock->strp); } @@ -1267,17 +1267,20 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) static void sk_psock_verdict_data_ready(struct sock *sk) { - struct socket *sock = sk->sk_socket; - const struct proto_ops *ops; + const struct proto_ops *ops = NULL; + struct socket *sock; int copied; trace_sk_data_ready(sk); - if (unlikely(!sock)) - return; - ops = READ_ONCE(sock->ops); + rcu_read_lock(); + sock = READ_ONCE(sk->sk_socket); + if (likely(sock)) + ops = READ_ONCE(sock->ops); + rcu_read_unlock(); if (!ops || !ops->read_skb) return; + copied = ops->read_skb(sk, sk_psock_verdict_recv); if (copied >= 0) { struct sk_psock *psock; @@ -1296,8 +1299,8 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) return; psock->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = sk_psock_verdict_data_ready; - sk->sk_write_space = sk_psock_write_space; + WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready); + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); } void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) @@ -1308,6 +1311,6 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) if (!psock->saved_data_ready) return; - sk->sk_data_ready = psock->saved_data_ready; + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); psock->saved_data_ready = NULL; }
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 13a63b4..d9faadb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c
@@ -193,14 +193,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL(eth_type_trans); -/** - * eth_header_parse - extract hardware address from packet - * @skb: packet to extract header from - * @haddr: destination buffer - */ -int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) +int eth_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { const struct ethhdr *eth = eth_hdr(skb); + memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; }
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index d1bfc49..fd2fea2 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c
@@ -532,8 +532,8 @@ static void hsr_change_rx_flags(struct net_device *dev, int change) static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - bool is_slave_a_added = false; - bool is_slave_b_added = false; + struct net_device *slave_a_dev = NULL; + struct net_device *slave_b_dev = NULL; struct hsr_port *port; struct hsr_priv *hsr; int ret = 0; @@ -549,33 +549,35 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, switch (port->type) { case HSR_PT_SLAVE_A: if (ret) { - /* clean up Slave-B */ netdev_err(dev, "add vid failed for Slave-A\n"); - if (is_slave_b_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_a_added = true; + slave_a_dev = port->dev; break; - case HSR_PT_SLAVE_B: if (ret) { - /* clean up Slave-A */ netdev_err(dev, "add vid failed for Slave-B\n"); - if (is_slave_a_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_b_added = true; + slave_b_dev = port->dev; break; default: + if (ret) + goto unwind; break; } } return 0; + +unwind: + if (slave_a_dev) + vlan_vid_del(slave_a_dev, proto, vid); + + if (slave_b_dev) + vlan_vid_del(slave_b_dev, proto, vid); + + return ret; } static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 50996f4..d418635 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c
@@ -123,6 +123,40 @@ static void hsr_free_node_rcu(struct rcu_head *rn) hsr_free_node(node); } +static void hsr_lock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_lock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_lock_bh(&node_a->seq_out_lock); + spin_lock_nested(&node_b->seq_out_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_bh(&node_b->seq_out_lock); + spin_lock_nested(&node_a->seq_out_lock, SINGLE_DEPTH_NESTING); + } +} + +static void hsr_unlock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_unlock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_unlock(&node_b->seq_out_lock); + spin_unlock_bh(&node_a->seq_out_lock); + } else { + spin_unlock(&node_a->seq_out_lock); + spin_unlock_bh(&node_b->seq_out_lock); + } +} + void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; @@ -432,7 +466,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) } ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); - spin_lock_bh(&node_real->seq_out_lock); + hsr_lock_seq_out_pair(node_real, node_curr); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -455,7 +489,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE); } } - spin_unlock_bh(&node_real->seq_out_lock); + hsr_unlock_seq_out_pair(node_real, node_curr); node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b71c224..df922f9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig
@@ -748,6 +748,7 @@ config TCP_AO bool "TCP: Authentication Option (RFC5925)" select CRYPTO + select CRYPTO_LIB_UTILS select TCP_SIGPOOL depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) help @@ -761,6 +762,7 @@ config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO_LIB_MD5 + select CRYPTO_LIB_UTILS help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8036e76..c7731e3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c
@@ -124,6 +124,12 @@ #include <trace/events/sock.h> +/* Keep the definition of IPv6 disable here for now, to avoid annoying linker + * issues in case IPv6=m + */ +int disable_ipv6_mod; +EXPORT_SYMBOL(disable_ipv6_mod); + /* The inetsw table contains everything that inet_create needs to * build a new socket. */
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2c922af..6dfc0bc 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c
@@ -235,10 +235,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a62b4c4..568bd1e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c
@@ -1079,10 +1079,12 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) static bool icmp_tag_validation(int proto) { + const struct net_protocol *ipprot; bool ok; rcu_read_lock(); - ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; + ipprot = rcu_dereference(inet_protos[proto]); + ok = ipprot ? ipprot->icmp_strict_tag_validation : false; rcu_read_unlock(); return ok; }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6c..e961936 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c
@@ -154,20 +154,6 @@ bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) } EXPORT_SYMBOL(inet_sk_get_local_port_range); -static bool inet_use_bhash2_on_bind(const struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) - return false; - - if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) - return true; - } -#endif - return sk->sk_rcv_saddr != htonl(INADDR_ANY); -} - static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) @@ -259,7 +245,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, * checks separately because their spinlocks have to be acquired/released * independently of each other, to prevent possible deadlocks */ - if (inet_use_bhash2_on_bind(sk)) + if (inet_use_hash2_on_bind(sk)) return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok); @@ -376,7 +362,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); - if (inet_use_bhash2_on_bind(sk)) { + if (inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) goto next_port; } @@ -562,7 +548,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) check_bind_conflict = false; } - if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { + if (check_bind_conflict && inet_use_hash2_on_bind(sk)) { if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) goto fail_unlock; }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fca9807..9bfccc2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c
@@ -200,7 +200,7 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { - inet_sk(sk)->inet_num = port; + WRITE_ONCE(inet_sk(sk)->inet_num, port); inet_csk(sk)->icsk_bind_hash = tb; inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); @@ -224,7 +224,7 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; inet_csk(sk)->icsk_bind_hash = NULL; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); sk->sk_userlocks &= ~SOCK_CONNECT_BIND; spin_lock(&head2->lock); @@ -352,7 +352,7 @@ static inline int compute_score(struct sock *sk, const struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && + if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; @@ -1206,7 +1206,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, sk->sk_hash = 0; inet_sk(sk)->inet_sport = 0; - inet_sk(sk)->inet_num = 0; + WRITE_ONCE(inet_sk(sk)->inet_num, 0); if (tw) inet_twsk_bind_unhash(tw, hinfo);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e132447..35f0baa 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c
@@ -919,7 +919,8 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, return -(t->hlen + sizeof(*iph)); } -static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int ipgre_header_parse(const struct sk_buff *skb, const struct net_device *dev, + unsigned char *haddr) { const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2e61ac1..5683c328 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c
@@ -58,6 +58,19 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, struct iphdr *iph; int err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + ip_rt_put(rt); + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + skb_scrub_packet(skb, xnet); skb_clear_hash_if_not_l4(skb); @@ -88,6 +101,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, pkt_len = 0; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } EXPORT_SYMBOL_GPL(iptunnel_xmit);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1aa2b05..c942f12 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c
@@ -2002,7 +2002,8 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg) } static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, - struct nl_info *nlinfo) + struct nl_info *nlinfo, + struct list_head *deferred_free) { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; @@ -2062,8 +2063,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, rcu_assign_pointer(nhp->nh_grp, newg); list_del(&nhge->nh_list); - free_percpu(nhge->stats); nexthop_put(nhge->nh); + list_add(&nhge->nh_list, deferred_free); /* Removal of a NH from a resilient group is notified through * bucket notifications. @@ -2083,6 +2084,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { struct nh_grp_entry *nhge, *tmp; + LIST_HEAD(deferred_free); /* If there is nothing to do, let's avoid the costly call to * synchronize_net() @@ -2091,10 +2093,16 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, return; list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) - remove_nh_grp_entry(net, nhge, nlinfo); + remove_nh_grp_entry(net, nhge, nlinfo, &deferred_free); /* make sure all see the newly published array before releasing rtnl */ synchronize_net(); + + /* Now safe to free percpu stats — all RCU readers have finished */ + list_for_each_entry_safe(nhge, tmp, &deferred_free, nh_list) { + list_del(&nhge->nh_list); + free_percpu(nhge->stats); + } } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 569befc..fc3affd 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c
@@ -203,7 +203,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, bool own_req; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - NULL, &own_req); + NULL, &own_req, NULL); if (child) { refcount_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); @@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(net, - ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); + tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 643763b..5654cc9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c
@@ -486,7 +486,8 @@ static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) proc_fib_multipath_hash_rand_seed), }; - WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed); + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed); } static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write, @@ -500,7 +501,7 @@ static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write int ret; mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; - user_seed = mphs->user_seed; + user_seed = READ_ONCE(mphs->user_seed); tmp = *table; tmp.data = &user_seed;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f84d9a4..202a4e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c
@@ -244,6 +244,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include <crypto/md5.h> +#include <crypto/utils.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> @@ -1446,7 +1447,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } if (binding) @@ -4181,7 +4182,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_NOTSENT_LOWAT: WRITE_ONCE(tp->notsent_lowat, val); - sk->sk_write_space(sk); + READ_ONCE(sk->sk_write_space)(sk); break; case TCP_INQ: if (val > 1 || val < 0) @@ -4970,7 +4971,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (memcmp(hash_location, newhash, 16) != 0) { + if (crypto_memneq(hash_location, newhash, 16)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 4980cad..a97cdf3 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c
@@ -10,6 +10,7 @@ #define pr_fmt(fmt) "TCP: " fmt #include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/inetdevice.h> #include <linux/tcp.h> @@ -922,7 +923,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, /* XXX: make it per-AF callback? */ tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, (phash - (u8 *)th), sne); - if (memcmp(phash, hash_buf, maclen)) { + if (crypto_memneq(phash, hash_buf, maclen)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c449a04..813d2e4 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c
@@ -725,7 +725,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } else { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); /* Pairs with lockless read in sk_clone_lock() */ sock_replace_proto(sk, psock->sk_proto); }
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index d83efd9..7935702 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c
@@ -509,7 +509,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next_normal; - if (r->id.idiag_sport != htons(sk->sk_num) && + if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != sk->sk_dport &&
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 1b7ba2c..9fdc19a 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c
@@ -333,7 +333,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, bool own_req; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - NULL, &own_req); + NULL, &own_req, NULL); if (!child) return NULL;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65a7a5e..cba8973 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -4858,15 +4858,24 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk, */ static enum skb_drop_reason tcp_sequence(const struct sock *sk, - u32 seq, u32 end_seq) + u32 seq, u32 end_seq, + const struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); + u32 seq_limit; if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) { - if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) + seq_limit = tp->rcv_nxt + tcp_receive_window(tp); + if (unlikely(after(end_seq, seq_limit))) { + /* Some stacks are known to handle FIN incorrectly; allow the + * FIN to extend beyond the window and check it in detail later. + */ + if (!after(end_seq - th->fin, seq_limit)) + return SKB_NOT_DROPPED_YET; + + if (after(seq, seq_limit)) return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; /* Only accept this packet if receive queue is empty. */ @@ -5365,25 +5374,11 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); -/* Check if this incoming skb can be added to socket receive queues - * while satisfying sk->sk_rcvbuf limit. - * - * In theory we should use skb->truesize, but this can cause problems - * when applications use too small SO_RCVBUF values. - * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio, - * allowing RWIN to be close to available space. - * Whenever the receive queue gets full, we can receive a small packet - * filling RWIN, but with a high skb->truesize, because most NIC use 4K page - * plus sk_buff metadata even when receiving less than 1500 bytes of payload. - * - * Note that we use skb->len to decide to accept or drop this packet, - * but sk->sk_rmem_alloc is the sum of all skb->truesize. - */ static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) { unsigned int rmem = atomic_read(&sk->sk_rmem_alloc); - return rmem + skb->len <= sk->sk_rcvbuf; + return rmem <= sk->sk_rcvbuf; } static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, @@ -5416,7 +5411,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); return; } @@ -5626,7 +5621,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) void tcp_data_ready(struct sock *sk) { if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) @@ -5682,7 +5677,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) inet_csk(sk)->icsk_ack.pending |= (ICSK_ACK_NOMEM | ICSK_ACK_NOW); inet_csk_schedule_ack(sk); - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); if (skb_queue_len(&sk->sk_receive_queue) && skb->len) { reason = SKB_DROP_REASON_PROTO_MEM; @@ -6105,7 +6100,9 @@ static void tcp_new_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } - INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); + INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space), + sk_stream_write_space, + sk); } /* Caller made space either from: @@ -6316,7 +6313,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t BUG(); WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); } } } @@ -6379,7 +6376,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, step1: /* Step 1: check sequence number */ - reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, th); if (reason) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." @@ -7648,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; + union tcp_seq_and_ts_off st; struct request_sock *req; bool want_cookie = false; struct dst_entry *dst; @@ -7717,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; + if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) + st = af_ops->init_seq_and_ts_off(net, skb); + if (tmp_opt.tstamp_ok) { tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); - tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + tcp_rsk(req)->ts_off = st.ts_off; } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); @@ -7741,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb); + isn = st.seq; } tcp_ecn_create_request(req, skb, sk, dst); @@ -7782,7 +7784,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); goto drop_and_free; } - sk->sk_data_ready(sk); + READ_ONCE(sk->sk_data_ready)(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); } else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 63a8b17..c7b2463 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c
@@ -88,6 +88,7 @@ #include <linux/skbuff_ref.h> #include <crypto/md5.h> +#include <crypto/utils.h> #include <trace/events/tcp.h> @@ -104,17 +105,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { static DEFINE_MUTEX(tcp_exit_batch_mutex); -static u32 tcp_v4_init_seq(const struct sk_buff *skb) +static union tcp_seq_and_ts_off +tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_seq(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) -{ - return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + return secure_tcp_seq_and_ts_off(net, + ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -326,15 +324,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len rt = NULL; if (likely(!tp->repair)) { + union tcp_seq_and_ts_off st; + + st = secure_tcp_seq_and_ts_off(net, + inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); if (!tp->write_seq) - WRITE_ONCE(tp->write_seq, - secure_tcp_seq(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port)); - WRITE_ONCE(tp->tsoffset, - secure_tcp_ts_off(net, inet->inet_saddr, - inet->inet_daddr)); + WRITE_ONCE(tp->write_seq, st.seq); + WRITE_ONCE(tp->tsoffset, st.ts_off); } atomic_set(&inet->inet_id, get_random_u16()); @@ -839,7 +838,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, goto out; tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (memcmp(md5_hash_location, newhash, 16) != 0) + if (crypto_memneq(md5_hash_location, newhash, 16)) goto out; } @@ -1676,8 +1675,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_seq, - .init_ts_off = tcp_v4_init_ts_off, + .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off, .send_synack = tcp_v4_send_synack, }; @@ -1705,7 +1703,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct inet_request_sock *ireq; bool found_dup_sk = false; @@ -1757,6 +1757,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); +#if IS_ENABLED(CONFIG_IPV6) + if (opt_child_init) + opt_child_init(newsk, sk); +#endif tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst4_mtu(dst));
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ec12886..dafb63b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c
@@ -925,7 +925,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - req, &own_req); + req, &own_req, NULL); if (!child) goto listen_overflow; @@ -1004,7 +1004,7 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) - parent->sk_data_ready(parent); + READ_ONCE(parent->sk_data_ready)(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6c6b68a..cb99a3c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c
@@ -287,7 +287,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } else { hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (hslot->count > 10) { + if (inet_use_hash2_on_bind(sk) && hslot->count > 10) { int exist; unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; @@ -1787,7 +1787,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * using prepare_to_wait_exclusive(). */ while (nb) { - INDIRECT_CALL_1(sk->sk_data_ready, + INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready), sock_def_readable, sk); nb--; } @@ -2287,7 +2287,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); - udp_sk(sk)->udp_portaddr_hash = newhash; if (hslot2 != nhslot2 || rcu_access_pointer(sk->sk_reuseport_cb)) { @@ -2321,19 +2320,25 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) if (udp_hashed4(sk)) { spin_lock_bh(&hslot->lock); - udp_rehash4(udptable, sk, newhash4); - if (hslot2 != nhslot2) { - spin_lock(&hslot2->lock); - udp_hash4_dec(hslot2); - spin_unlock(&hslot2->lock); + if (inet_rcv_saddr_any(sk)) { + udp_unhash4(udptable, sk); + } else { + udp_rehash4(udptable, sk, newhash4); + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); - spin_lock(&nhslot2->lock); - udp_hash4_inc(nhslot2); - spin_unlock(&nhslot2->lock); + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); + } } spin_unlock_bh(&hslot->lock); } + + udp_sk(sk)->udp_portaddr_hash = newhash; } } EXPORT_IPV6_MOD(udp_lib_rehash);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 91233e3..779a3a0 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c
@@ -158,7 +158,7 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; if (restore) { - sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); sock_replace_proto(sk, psock->sk_proto); return 0; }
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d3e621a..826e9e7 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c
@@ -20,10 +20,9 @@ EXPORT_SYMBOL(udplite_table); /* Designate sk as UDP-Lite socket */ static int udplite_sk_init(struct sock *sk) { - udp_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udp_init_sock(sk); } static int udplite_rcv(struct sk_buff *skb)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f13..dd0b4d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c
@@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -3625,12 +3625,12 @@ static void addrconf_permanent_addr(struct net *net, struct net_device *dev) if ((ifp->flags & IFA_F_PERMANENT) && fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); - in6_ifa_hold(ifp); - ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", idev->dev->name, &ifp->addr); + in6_ifa_hold(ifp); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); } } @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 23cc9b4..4cbd45b 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c
@@ -86,8 +86,6 @@ struct ipv6_params ipv6_defaults = { .autoconf = 1, }; -static int disable_ipv6_mod; - module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); @@ -97,12 +95,6 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); -bool ipv6_mod_enabled(void) -{ - return disable_ipv6_mod == 0; -} -EXPORT_SYMBOL_GPL(ipv6_mod_enabled); - static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->ipv6_pinfo_offset;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c564b68..993e2d7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c
@@ -763,6 +763,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; + struct ipv6_rt_hdr *orthdr; struct ipv6_rt_hdr *rthdr; struct ipv6_opt_hdr *hdr; struct ipv6_txoptions *opt = ipc6->opt; @@ -924,9 +925,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } if (cmsg->cmsg_type == IPV6_DSTOPTS) { + if (opt->dst1opt) + opt->opt_flen -= ipv6_optlen(opt->dst1opt); opt->opt_flen += len; opt->dst1opt = hdr; } else { + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += len; opt->dst0opt = hdr; } @@ -969,12 +974,17 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + orthdr = opt->srcrt; + if (orthdr) + opt->opt_nflen -= ((orthdr->hdrlen + 1) << 3); opt->opt_nflen += len; opt->srcrt = rthdr; if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += dsthdrlen; opt->dst0opt = opt->dst1opt; opt->dst1opt = NULL;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e75da98..9f75313 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c
@@ -271,10 +271,13 @@ static void esp_output_done(void *data, int err) xfrm_dev_resume(skb); } else { if (!err && - x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) - esp_output_tail_tcp(x, skb); - else + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) { + err = esp_output_tail_tcp(x, skb); + if (err != -EINPROGRESS) + kfree_skb(skb); + } else { xfrm_output_resume(skb_to_full_sk(skb), skb, err); + } } }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5e3610a..95558fd6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c
@@ -379,6 +379,10 @@ static int ipv6_srh_rcv(struct sk_buff *skb) hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); idev = __in6_dev_get(skb->dev); + if (!idev) { + kfree_skb(skb); + return -1; + } accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), READ_ONCE(idev->cnf.seg6_enabled));
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 813d2e9..d5d23a9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c
@@ -875,6 +875,9 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, if (!skb2) return 1; + /* Remove debris left by IPv4 stack. */ + memset(IP6CB(skb2), 0, sizeof(*IP6CB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 5e1da08..182d38e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c
@@ -95,7 +95,8 @@ static inline int compute_score(struct sock *sk, const struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && + if (net_eq(sock_net(sk), net) && + READ_ONCE(inet_sk(sk)->inet_num) == hnum && sk->sk_family == PF_INET6) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1;
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index b76f89d..3978773 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c
@@ -708,7 +708,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen, bool is_input) + unsigned int sclen, bool is_input) { struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; @@ -939,7 +939,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, bool is_input) { struct ioam6_schema *sc; - u8 sclen = 0; + unsigned int sclen = 0; /* Skip if Overflow flag is set */
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71..45ef4d6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c
@@ -727,20 +727,28 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) { + struct dst_metrics *m; + if (!f6i) return; - if (f6i->fib6_metrics == &dst_default_metrics) { + if (READ_ONCE(f6i->fib6_metrics) == &dst_default_metrics) { + struct dst_metrics *dflt = (struct dst_metrics *)&dst_default_metrics; struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC); if (!p) return; + p->metrics[metric - 1] = val; refcount_set(&p->refcnt, 1); - f6i->fib6_metrics = p; + if (cmpxchg(&f6i->fib6_metrics, dflt, p) != dflt) + kfree(p); + else + return; } - f6i->fib6_metrics->metrics[metric - 1] = val; + m = READ_ONCE(f6i->fib6_metrics); + WRITE_ONCE(m->metrics[metric - 1], val); } /* @@ -1133,7 +1141,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,6 +2356,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2389,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; }
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7c12bf7..c92f98c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c
@@ -133,11 +133,6 @@ static void fl_release(struct ip6_flowlabel *fl) if (time_after(ttd, fl->expires)) fl->expires = ttd; ttd = fl->expires; - if (fl->opt && fl->share == IPV6_FL_S_EXCL) { - struct ipv6_txoptions *opt = fl->opt; - fl->opt = NULL; - kfree(opt); - } if (!timer_pending(&ip6_fl_gc_timer) || time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4c29aa9..0b53488 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c
@@ -601,11 +601,16 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!skb2) return 0; + /* Remove debris left by IPv6 stack. */ + memset(IPCB(skb2), 0, sizeof(*IPCB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); + if (eiph->version != 4 || eiph->ihl < 5) + goto out; /* Try to guess incoming interface */ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6a5d8c..186e60c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c
@@ -1209,6 +1209,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; + ndmsg->nduseropt_pad1 = 0; + ndmsg->nduseropt_pad2 = 0; + ndmsg->nduseropt_pad3 = 0; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 4ad8b20..5561bd9c 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", rtinfo->invflags); return -EINVAL; } + if (rtinfo->addrnr > IP6T_RT_HOPS) { + pr_debug("too many addresses specified\n"); + return -EINVAL; + } if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && (!(rtinfo->flags & IP6T_RT_TYP) || (rtinfo->rt_type != 0) ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 85df25c..cb52170 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c
@@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); @@ -1063,7 +1063,8 @@ static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) */ if (netif_is_l3_slave(dev) && !rt6_need_strict(&res->f6i->fib6_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); + dev = l3mdev_master_dev_rcu(dev) ? : + dev_net(dev)->loopback_dev; else if (!netif_is_l3_master(dev)) dev = dev_net(dev)->loopback_dev; /* last case is netif_is_l3_master(dev) is true in which @@ -3582,7 +3583,6 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; - int addr_type; int err; fib6_nh->fib_nh_family = AF_INET6; @@ -3624,11 +3624,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, fib6_nh->fib_nh_weight = 1; - /* We cannot add true routes via loopback here, - * they would result in kernel looping; promote them to reject routes + /* Reset the nexthop device to the loopback device in case of reject + * routes. */ - addr_type = ipv6_addr_type(&cfg->fc_dst); - if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { + if (cfg->fc_flags & RTF_REJECT) { /* hold loopback dev/idev if we haven't done so. */ if (dev != net->loopback_dev) { if (dev) {
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index ee6bac0..e6964c6 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c
@@ -184,6 +184,8 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) int require_hmac; idev = __in6_dev_get(skb->dev); + if (!idev) + return false; srh = (struct ipv6_sr_hdr *)skb_transport_header(skb);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7e007f0..4f6f0d7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c
@@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(net, - ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32); + union tcp_seq_and_ts_off st; + + st = secure_tcpv6_seq_and_ts_off(net, + ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); + tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d10487b..bb09d5c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c
@@ -68,6 +68,7 @@ #include <linux/seq_file.h> #include <crypto/md5.h> +#include <crypto/utils.h> #include <trace/events/tcp.h> @@ -104,18 +105,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_seq(const struct sk_buff *skb) +static union tcp_seq_and_ts_off +tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); -} - -static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) -{ - return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32); + return secure_tcpv6_seq_and_ts_off(net, + ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, @@ -319,14 +316,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { + union tcp_seq_and_ts_off st; + + st = secure_tcpv6_seq_and_ts_off(net, + np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); if (!tp->write_seq) - WRITE_ONCE(tp->write_seq, - secure_tcpv6_seq(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport)); - tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32); + WRITE_ONCE(tp->write_seq, st.seq); + tp->tsoffset = st.ts_off; } if (tcp_fastopen_defer_connect(sk, &err)) @@ -816,8 +815,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq = tcp_v6_init_seq, - .init_ts_off = tcp_v6_init_ts_off, + .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, .send_synack = tcp_v6_send_synack, }; @@ -1048,7 +1046,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, key.type = TCP_KEY_MD5; tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); - if (memcmp(md5_hash_location, newhash, 16) != 0) + if (crypto_memneq(md5_hash_location, newhash, 16)) goto out; } #endif @@ -1312,11 +1310,48 @@ static void tcp_v6_restore_cb(struct sk_buff *skb) sizeof(struct inet6_skb_parm)); } +/* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ +static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) +{ + struct inet_sock *newinet = inet_sk(newsk); + struct ipv6_pinfo *newnp; + + newinet->pinet6 = newnp = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; + + memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); + + newnp->saddr = newsk->sk_v6_rcv_saddr; + + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; + if (sk_is_mptcp(newsk)) + mptcpv6_handle_mapped(newsk, true); + newsk->sk_backlog_rcv = tcp_v4_do_rcv; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; +#endif + + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->pktoptions = NULL; + newnp->opt = NULL; + + /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ + newnp->mcast_oif = newinet->mc_index; + newnp->mcast_hops = newinet->mc_ttl; + + newnp->rcv_flowinfo = 0; + if (inet6_test_bit(REPFLOW, sk)) + newnp->flow_label = 0; +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_request_sock *ireq; @@ -1332,61 +1367,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #endif struct flowi6 fl6; - if (skb->protocol == htons(ETH_P_IP)) { - /* - * v6 mapped - */ - - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); - - if (!newsk) - return NULL; - - newinet = inet_sk(newsk); - newinet->pinet6 = tcp_inet6_sk(newsk); - newinet->ipv6_fl_list = NULL; - - newnp = tcp_inet6_sk(newsk); - newtp = tcp_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - - newnp->saddr = newsk->sk_v6_rcv_saddr; - - inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; - if (sk_is_mptcp(newsk)) - mptcpv6_handle_mapped(newsk, true); - newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) - newtp->af_specific = &tcp_sock_ipv6_mapped_specific; -#endif - - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - newnp->pktoptions = NULL; - newnp->opt = NULL; - newnp->mcast_oif = inet_iif(skb); - newnp->mcast_hops = ip_hdr(skb)->ttl; - newnp->rcv_flowinfo = 0; - if (inet6_test_bit(REPFLOW, sk)) - newnp->flow_label = 0; - - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks count - * here, tcp_create_openreq_child now does this for us, see the comment in - * that function for the gory details. -acme - */ - - /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 icsk.icsk_af_ops. - Sync it now. - */ - tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); - - return newsk; - } - + if (skb->protocol == htons(ETH_P_IP)) + return tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req, + tcp_v6_mapped_child_init); ireq = inet_rsk(req); if (sk_acceptq_is_full(sk))
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 2cec542..e867721 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c
@@ -16,10 +16,9 @@ static int udplitev6_sk_init(struct sock *sk) { - udpv6_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udpv6_init_sock(sk); } static int udplitev6_rcv(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1f19b6f..125ea9a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c
@@ -57,6 +57,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; + int err; dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) @@ -68,9 +69,11 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, return -EHOSTUNREACH; } dev = idev->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, - &saddr->in6); + err = ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, + &saddr->in6); dst_release(dst); + if (err) + return -EHOSTUNREACH; return 0; }
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 5dd7e05..3912e75 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c
@@ -628,7 +628,7 @@ static int kcm_write_msgs(struct kcm_sock *kcm) skb = txm->frag_skb; } - if (WARN_ON(!skb_shinfo(skb)->nr_frags) || + if (WARN_ON_ONCE(!skb_shinfo(skb)->nr_frags) || WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) { ret = -EINVAL; goto out; @@ -749,7 +749,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - struct sk_buff *skb = NULL, *head = NULL; + struct sk_buff *skb = NULL, *head = NULL, *frag_prev = NULL; size_t copy, copied = 0; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); int eor = (sock->type == SOCK_DGRAM) ? @@ -824,6 +824,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) else skb->next = tskb; + frag_prev = skb; skb = tskb; skb->ip_summed = CHECKSUM_UNNECESSARY; continue; @@ -933,6 +934,22 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) out_error: kcm_push(kcm); + /* When MAX_SKB_FRAGS was reached, a new skb was allocated and + * linked into the frag_list before data copy. If the copy + * subsequently failed, this skb has zero frags. Remove it from + * the frag_list to prevent kcm_write_msgs from later hitting + * WARN_ON(!skb_shinfo(skb)->nr_frags). + */ + if (frag_prev && !skb_shinfo(skb)->nr_frags) { + if (head == frag_prev) + skb_shinfo(head)->frag_list = NULL; + else + frag_prev->next = NULL; + kfree_skb(skb); + /* Update skb as it may be saved in partial_message via goto */ + skb = frag_prev; + } + if (sock->type == SOCK_SEQPACKET) { /* Wrote some bytes before encountering an * error, return partial success.
diff --git a/net/key/af_key.c b/net/key/af_key.c index 0756bac..72ac2ac 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c
@@ -3518,7 +3518,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress * static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, - uint32_t reqid, uint8_t family, + uint32_t reqid, sa_family_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; @@ -3583,12 +3583,17 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { - /* old locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->old_family); - /* new locator pair */ - size_pol += sizeof(struct sadb_x_ipsecrequest) + - pfkey_sockaddr_pair_size(mp->new_family); + int pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->old_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; + + pair_size = pfkey_sockaddr_pair_size(mp->new_family); + if (!pair_size) + return -EINVAL; + size_pol += sizeof(struct sadb_x_ipsecrequest) + pair_size; } size += sizeof(struct sadb_msg) + size_pol;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b92b4a5..b85375c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c
@@ -1904,12 +1904,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, __sta_info_flush(sdata, true, link_id, NULL); - ieee80211_remove_link_keys(link, &keys); - if (!list_empty(&keys)) { - synchronize_net(); - ieee80211_free_key_list(local, &keys); - } - ieee80211_stop_mbssid(sdata); RCU_INIT_POINTER(link_conf->tx_bss_conf, NULL); @@ -1921,6 +1915,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_BEACON_ENABLED); + ieee80211_remove_link_keys(link, &keys); + if (!list_empty(&keys)) { + synchronize_net(); + ieee80211_free_key_list(local, &keys); + } + if (sdata->wdev.links[link_id].cac_started) { chandef = link_conf->chanreq.oper; wiphy_hrtimer_work_cancel(wiphy, &link->dfs_cac_timer_work);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 4447cf0..05f45e6 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c
@@ -561,14 +561,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { - struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sub_if_data *sdata; enum ieee80211_sta_rx_bandwidth new_sta_bw; unsigned int link_id; if (!ieee80211_sdata_running(sta->sdata)) continue; - for (link_id = 0; link_id < ARRAY_SIZE(sta->sdata->link); link_id++) { + sdata = get_bss_sdata(sta->sdata); + + for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link = rcu_dereference(sdata->link[link_id]); struct ieee80211_bss_conf *link_conf;
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index d02f073..687a66c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c
@@ -320,7 +320,6 @@ static ssize_t aql_enable_read(struct file *file, char __user *user_buf, static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - bool aql_disabled = static_key_false(&aql_disable.key); char buf[3]; size_t len; @@ -335,15 +334,12 @@ static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, if (len > 0 && buf[len - 1] == '\n') buf[len - 1] = 0; - if (buf[0] == '0' && buf[1] == '\0') { - if (!aql_disabled) - static_branch_inc(&aql_disable); - } else if (buf[0] == '1' && buf[1] == '\0') { - if (aql_disabled) - static_branch_dec(&aql_disable); - } else { + if (buf[0] == '0' && buf[1] == '\0') + static_branch_enable(&aql_disable); + else if (buf[0] == '1' && buf[1] == '\0') + static_branch_disable(&aql_disable); + else return -EINVAL; - } return count; }
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 75096b2..078e1e2 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c
@@ -154,6 +154,7 @@ void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, u8 *ptr = mgmt->u.action.u.eml_omn.variable; struct ieee80211_eml_params eml_params = { .link_id = status->link_id, + .control = control, }; struct sta_info *sta; int opt_len = 0;
diff --git a/net/mac80211/link.c b/net/mac80211/link.c index f589450..03bfca2 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c
@@ -281,6 +281,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]; struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS]; bool use_deflink = old_links == 0; /* set for error case */ + bool non_sta = sdata->vif.type != NL80211_IFTYPE_STATION; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -337,6 +338,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, link = links[link_id]; ieee80211_link_init(sdata, link_id, &link->data, &link->conf); ieee80211_link_setup(&link->data); + ieee80211_set_wmm_default(&link->data, true, non_sta); } if (new_links == 0)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index a7e71ee..8fdbdf9 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c
@@ -79,6 +79,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, * - MDA enabled * - Power management control on fc */ + if (!ie->mesh_config) + return false; + if (!(ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && @@ -1635,6 +1638,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, if (!mesh_matches_local(sdata, elems)) goto free; + if (!elems->mesh_chansw_params_ie) + goto free; + ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e8edcee..810bea1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c
@@ -7085,6 +7085,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, control = le16_to_cpu(prof->control); link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + removed_links |= BIT(link_id); /* the MAC address should not be included, but handle it */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6dc22f1..dd51a57 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c
@@ -2782,7 +2782,9 @@ static void sta_set_link_sinfo(struct sta_info *sta, } link_sinfo->inactive_time = - jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, link_id)); + jiffies_delta_to_msecs(jiffies - + ieee80211_sta_last_active(sta, + link_id)); if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { @@ -3015,7 +3017,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = - jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, -1)); + jiffies_delta_to_msecs(jiffies - + ieee80211_sta_last_active(sta, -1)); if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index dbbfe2d..1dca2fa 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c
@@ -1449,7 +1449,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, } sta = sta_info_get(sdata, peer); - if (!sta) + if (!sta || !sta->sta.tdls) return -ENOLINK; iee80211_tdls_recalc_chanctx(sdata, sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8cdbd41..b7aedaa 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c
@@ -1899,8 +1899,10 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_tx_data tx; struct sk_buff *skb2; - if (ieee80211_tx_prepare(sdata, &tx, NULL, skb) == TX_DROP) + if (ieee80211_tx_prepare(sdata, &tx, NULL, skb) == TX_DROP) { + kfree_skb(skb); return false; + } info->band = band; info->control.vif = vif;
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9e4631f..000be60 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c
@@ -469,7 +469,9 @@ static int mac802154_header_create(struct sk_buff *skb, } static int -mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +mac802154_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) { struct ieee802154_hdr hdr;
diff --git a/net/mctp/route.c b/net/mctp/route.c index 0381377..59ad60b 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c
@@ -359,6 +359,7 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) { struct mctp_sk_key *key; struct mctp_flow *flow; + unsigned long flags; flow = skb_ext_find(skb, SKB_EXT_MCTP); if (!flow) @@ -366,12 +367,14 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) key = flow->key; - if (key->dev) { - WARN_ON(key->dev != dev); - return; - } + spin_lock_irqsave(&key->lock, flags); - mctp_dev_set_key(dev, key); + if (!key->dev) + mctp_dev_set_key(dev, key); + else + WARN_ON(key->dev != dev); + + spin_unlock_irqrestore(&key->lock, flags); } #else static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ef9e749..18d3da8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c
@@ -83,14 +83,30 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) return mpls_dereference(net, platform_label[index]); } +static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels) +{ + struct mpls_route __rcu **platform_label; + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->mpls.platform_label_seq); + platform_label = rcu_dereference(net->mpls.platform_label); + *platform_labels = net->mpls.platform_labels; + } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence)); + + return platform_label; +} + static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) { struct mpls_route __rcu **platform_label; + size_t platform_labels; - if (index >= net->mpls.platform_labels) + platform_label = mpls_platform_label_rcu(net, &platform_labels); + + if (index >= platform_labels) return NULL; - platform_label = rcu_dereference(net->mpls.platform_label); return rcu_dereference(platform_label[index]); } @@ -2240,8 +2256,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (index < MPLS_LABEL_FIRST_UNRESERVED) index = MPLS_LABEL_FIRST_UNRESERVED; - platform_label = rcu_dereference(net->mpls.platform_label); - platform_labels = net->mpls.platform_labels; + platform_label = mpls_platform_label_rcu(net, &platform_labels); if (filter.filter_set) flags |= NLM_F_DUMP_FILTERED; @@ -2645,8 +2660,12 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* Update the global pointers */ + local_bh_disable(); + write_seqcount_begin(&net->mpls.platform_label_seq); net->mpls.platform_labels = limit; rcu_assign_pointer(net->mpls.platform_label, labels); + write_seqcount_end(&net->mpls.platform_label_seq); + local_bh_enable(); mutex_unlock(&net->mpls.platform_mutex); @@ -2728,6 +2747,8 @@ static __net_init int mpls_net_init(struct net *net) int i; mutex_init(&net->mpls.platform_mutex); + seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex); + net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; @@ -2854,6 +2875,7 @@ static int __init mpls_init(void) rtnl_af_unregister(&mpls_af_ops); out_unregister_dev_type: dev_remove_pack(&mpls_packet_type); + unregister_netdevice_notifier(&mpls_dev_notifier); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); goto out;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 7298836..57a4566 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); } -void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow, + const struct mptcp_rm_list *rm_list) { - struct mptcp_subflow_context *subflow, *alt = NULL; + u8 i, id = subflow_get_local_id(subflow); + + for (i = 0; i < rm_list->nr; i++) { + if (rm_list->ids[i] == id) + return true; + } + + return false; +} + +static void +mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); @@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) return; mptcp_for_each_subflow(msk, subflow) { - if (__mptcp_subflow_active(subflow)) { - if (!subflow->stale) { - mptcp_pm_send_ack(msk, subflow, false, false); - return; - } + if (!__mptcp_subflow_active(subflow)) + continue; - if (!alt) - alt = subflow; + if (unlikely(subflow->stale)) { + if (!stale) + stale = subflow; + } else if (unlikely(rm_list && + subflow_in_rm_list(subflow, rm_list))) { + if (!same_id) + same_id = subflow; + } else { + goto send_ack; } } - if (alt) - mptcp_pm_send_ack(msk, alt, false, false); + if (same_id) + subflow = same_id; + else if (stale) + subflow = stale; + else + return; + +send_ack: + mptcp_pm_send_ack(msk, subflow, false, false); +} + +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) +{ + mptcp_pm_addr_send_ack_avoid_list(msk, NULL); } int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, @@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); - mptcp_pm_addr_send_ack(msk); + mptcp_pm_addr_send_ack_avoid_list(msk, rm_list); return 0; }
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index b5316a6..82e59f9 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c
@@ -418,6 +418,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } exit: + /* If an endpoint has both the signal and subflow flags, but it is not + * possible to create subflows -- the 'while' loop body above never + * executed -- then still mark the endp as used, which is somehow the + * case. This avoids issues later when removing the endpoint and calling + * __mark_subflow_endp_available(), which expects the increment here. + */ + if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id) + msk->pm.local_addr_used++; + mptcp_pm_nl_check_work_pending(msk); } @@ -829,7 +838,7 @@ static struct lock_class_key mptcp_keys[2]; static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { - bool is_ipv6 = sk->sk_family == AF_INET6; + bool is_ipv6 = entry->addr.family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct sock *newsk, *ssk;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cf1852b..65c3bb8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c
@@ -2006,7 +2006,7 @@ static void mptcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, - int *cmsg_flags) + int *cmsg_flags, struct sk_buff **last) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; @@ -2023,6 +2023,7 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, /* skip already peeked skbs */ if (total_data_len + data_len <= copied_total) { total_data_len += data_len; + *last = skb; continue; } @@ -2058,6 +2059,8 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, } mptcp_eat_recv_skb(sk, skb); + } else { + *last = skb; } if (copied >= len) @@ -2288,10 +2291,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, cmsg_flags = MPTCP_CMSG_INQ; while (copied < len) { + struct sk_buff *last = NULL; int err, bytes_read; bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, - copied, &tss, &cmsg_flags); + copied, &tss, &cmsg_flags, + &last); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2343,7 +2348,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, pr_debug("block timeout %ld\n", timeo); mptcp_cleanup_rbuf(msk, copied); - err = sk_wait_data(sk, &timeo, NULL); + err = sk_wait_data(sk, &timeo, last); if (err < 0) { err = copied ? : err; goto out_err;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 7ef1d2b..6716970 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c
@@ -808,7 +808,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; @@ -855,7 +857,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, create_child: child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); + req_unhash, own_req, opt_child_init); if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 62fb103..040a315 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c
@@ -224,7 +224,8 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) if (!nah) { netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n", h->type); - return -ENOENT; + ret = -ENOENT; + goto out; } ret = ncsi_validate_aen_pkt(h, nah->payload);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 271ec6c..fbd84bc 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c
@@ -1176,8 +1176,10 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, /* Find the NCSI device */ nd = ncsi_find_dev(orig_dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; - if (!ndp) - return -ENODEV; + if (!ndp) { + ret = -ENODEV; + goto err_free_skb; + } /* Check if it is AEN packet */ hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb); @@ -1199,7 +1201,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, if (!nrh) { netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n", hdr->type); - return -ENOENT; + ret = -ENOENT; + goto err_free_skb; } /* Associate with the request */ @@ -1207,7 +1210,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, nr = &ndp->requests[hdr->id]; if (!nr->used) { spin_unlock_irqrestore(&ndp->lock, flags); - return -ENODEV; + ret = -ENODEV; + goto err_free_skb; } nr->rsp = skb; @@ -1261,4 +1265,8 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, out: ncsi_free_request(nr); return ret; + +err_free_skb: + kfree_skb(skb); + return ret; }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a2fe711..d0c9fe5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c
@@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; @@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s && STRNCMP(s->name, name)) { + if (s && nla_strcmp(name, s->name) == 0) { __ip_set_get(s); index = i; *set = s;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 181daa9..b79e5dd 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1098,7 +1098,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!test_bit(i, n->used)) k++; } - if (n->pos == 0 && k == 0) { + if (k == n->pos) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 98b91b3..1cef84f 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, - nla_data(tb[IPSET_ATTR_NAMEREF]), + tb[IPSET_ATTR_NAMEREF], &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 6f3a641..c200318 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c
@@ -170,7 +170,7 @@ static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, static const struct bpf_link_ops bpf_nf_link_lops = { .release = bpf_nf_link_release, - .dealloc = bpf_nf_link_dealloc, + .dealloc_deferred = bpf_nf_link_dealloc, .detach = bpf_nf_link_detach, .show_fdinfo = bpf_nf_link_show_info, .fill_link_info = bpf_nf_link_fill_link_info,
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index a7552a4..4f39bf7 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int timeout) { const struct nf_conntrack_helper *helper; + struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); struct rtable *rt = skb_rtable(skb); @@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 81baf20..9df1594 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c
@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; + lockdep_nfct_expect_lock_held(); + rcu_read_lock(); notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cfc2daa..24d0576 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct net *net = nf_ct_exp_net(exp); struct nf_conntrack_net *cnet; + lockdep_nfct_expect_lock_held(); WARN_ON(!master_help); WARN_ON(timer_pending(&exp->timeout)); @@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, const struct net *net) { return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - net_eq(net, nf_ct_net(i->master)) && - nf_ct_zone_equal_any(i->master, zone); + net_eq(net, read_pnet(&i->net)) && + nf_ct_exp_zone_equal_any(i, zone); } bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) { + lockdep_nfct_expect_lock_held(); + if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net, struct nf_conntrack_expect *i, *exp = NULL; unsigned int h; + lockdep_nfct_expect_lock_held(); + if (!cnet->expect_count) return NULL; @@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); +/* This function can only be used from packet path, where accessing + * master's helper is safe, because the packet holds a reference on + * the conntrack object. Never use it from control plane. + */ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn *ct = exp->master; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conn_help *help; int len; if (family == AF_INET) @@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->flags = 0; exp->class = class; exp->expectfn = NULL; - exp->helper = NULL; + + help = nfct_help(ct); + if (help) + helper = rcu_dereference(help->helper); + + rcu_assign_pointer(exp->helper, helper); + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif exp->tuple.src.l3num = family; exp->tuple.dst.protonum = proto; @@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, unsigned int h; int ret = 0; + lockdep_nfct_expect_lock_held(); + if (!master_help) { ret = -ESHUTDOWN; goto out; @@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, nf_ct_expect_insert(expect); - spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); + spin_unlock_bh(&nf_conntrack_expect_lock); + return 0; out: spin_unlock_bh(&nf_conntrack_expect_lock); @@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; struct nf_conntrack_helper *helper; + struct net *net = seq_file_net(s); struct hlist_node *n = v; char *delim = ""; expect = hlist_entry(n, struct nf_conntrack_expect, hnode); + if (!net_eq(nf_ct_exp_net(expect), net)) + return 0; + if (expect->timeout.function) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); @@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_USERSPACE) seq_printf(s, "%sUSERSPACE", delim); - helper = rcu_dereference(nfct_help(expect->master)->helper); + helper = rcu_dereference(expect->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); if (helper->expect_policy[expect->class].name[0])
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 540d977..7b1497e 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -331,6 +331,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, 0, 2)) return H323_ERROR_BOUND; len = get_bits(bs, 2) + 1; + if (nf_h323_error_boundary(bs, len, 0)) + return H323_ERROR_BOUND; BYTE_ALIGN(bs); if (base && (f->attr & DECODE)) { /* timeToLive */ unsigned int v = get_uint(bs, len) + f->lb; @@ -796,7 +798,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, len, 0)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); if (nf_h323_error_boundary(bs, len, 0)) @@ -922,6 +924,8 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) break; p++; len--; + if (len <= 0) + break; return DecodeH323_UserInformation(buf, p, len, &q931->UUIE); }
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a2a0e22..3f5c504 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = &nf_conntrack_helper_h245; + rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - exp->helper = nf_conntrack_helper_ras; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - exp->helper = nf_conntrack_helper_q931; + rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ceb48c3..a715304 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c
@@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; - if (exp->helper == me) - return true; - - this = rcu_dereference_protected(help->helper, + this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } @@ -419,8 +415,13 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - nf_ct_expect_iterate_destroy(expect_iter_me, NULL); + nf_ct_expect_iterate_destroy(expect_iter_me, me); nf_ct_iterate_destroy(unhelp, me); + + /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as + * last step, this ensures rcu readers of exp->helper are done. + * No need for another synchronize_rcu() here. + */ } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c9d725f..a20cd82 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c
@@ -910,8 +910,8 @@ struct ctnetlink_filter { }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { - [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, - [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), + [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, @@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr, if (ret) return ret; - if (tb[CTA_FILTER_ORIG_FLAGS]) { + if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); - if (filter->orig_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } - if (tb[CTA_FILTER_REPLY_FLAGS]) { + if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); - if (filter->reply_flags & ~CTA_FILTER_F_ALL) - return -EOPNOTSUPP; - } return 0; } @@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, @@ -2642,7 +2636,6 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2871,7 +2864,6 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; int err; @@ -2885,17 +2877,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper == NULL) - return -EOPNOTSUPP; - } - exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - helper, &tuple, &mask); + &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3012,7 +2995,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; - struct nf_conn_help *help; + struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; @@ -3057,15 +3040,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; - help = nfct_help(master); - if (help) { - struct nf_conntrack_helper *helper; - helper = rcu_dereference(help->helper); - if (helper && - nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) - goto nla_put_failure; - } + helper = rcu_dereference(exp->helper); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) @@ -3212,7 +3192,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; - struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help; u_int8_t l3proto = nfmsg->nfgen_family; unsigned long last_id = cb->args[1]; struct nf_conntrack_expect *exp; @@ -3220,6 +3200,10 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0]) return 0; + help = nfct_help(ct); + if (!help) + return 0; + rcu_read_lock(); restart: @@ -3249,6 +3233,24 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int ctnetlink_dump_exp_ct_start(struct netlink_callback *cb) +{ + struct nf_conn *ct = cb->data; + + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return -ENOENT; + return 0; +} + +static int ctnetlink_dump_exp_ct_done(struct netlink_callback *cb) +{ + struct nf_conn *ct = cb->data; + + if (ct) + nf_ct_put(ct); + return 0; +} + static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -3264,6 +3266,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, + .start = ctnetlink_dump_exp_ct_start, + .done = ctnetlink_dump_exp_ct_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, @@ -3334,31 +3338,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb, if (err < 0) return err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + kfree_skb(skb2); return -ENOENT; } } - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb2) { - nf_ct_expect_put(exp); - return -ENOMEM; - } - rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); + if (err <= 0) { kfree_skb(skb2); return -ENOMEM; @@ -3370,12 +3380,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb, static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; - const struct nf_conn_help *m_help; const char *name = data; - m_help = nfct_help(exp->master); - - helper = rcu_dereference(m_help->helper); + helper = rcu_dereference(exp->helper); if (!helper) return false; @@ -3408,22 +3415,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (err < 0) return err; + spin_lock_bh(&nf_conntrack_expect_lock); + /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); - if (!exp) + if (!exp) { + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; + } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); + spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ - spin_lock_bh(&nf_conntrack_expect_lock); if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); @@ -3465,7 +3476,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, #if IS_ENABLED(CONFIG_NF_NAT) static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { - [CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 }, + [CTA_EXPECT_NAT_DIR] = NLA_POLICY_MAX(NLA_BE32, IP_CT_DIR_REPLY), [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED }, }; #endif @@ -3506,20 +3517,25 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { - u_int32_t class = 0; + struct net *net = read_pnet(&ct->ct_net); + struct nf_conntrack_helper *helper; struct nf_conntrack_expect *exp; struct nf_conn_help *help; + u32 class = 0; int err; help = nfct_help(ct); if (!help) return ERR_PTR(-EOPNOTSUPP); - if (cda[CTA_EXPECT_CLASS] && helper) { + helper = rcu_dereference(help->helper); + if (!helper) + return ERR_PTR(-EOPNOTSUPP); + + if (cda[CTA_EXPECT_CLASS]) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) return ERR_PTR(-EINVAL); @@ -3549,7 +3565,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->class = class; exp->master = ct; - exp->helper = helper; + write_pnet(&exp->net, net); +#ifdef CONFIG_NF_CONNTRACK_ZONES + exp->zone = ct->zone; +#endif + rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; @@ -3559,6 +3579,12 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; +#if IS_ENABLED(CONFIG_NF_NAT) + } else { + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); + exp->dir = 0; +#endif } return exp; err_out: @@ -3574,7 +3600,6 @@ ctnetlink_create_expect(struct net *net, { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; struct nf_conn *ct; int err; @@ -3600,33 +3625,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper == NULL) { - rcu_read_unlock(); -#ifdef CONFIG_MODULES - if (request_module("nfct-helper-%s", helpname) < 0) { - err = -EOPNOTSUPP; - goto err_ct; - } - rcu_read_lock(); - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper) { - err = -EAGAIN; - goto err_rcu; - } - rcu_read_unlock(); -#endif - err = -EOPNOTSUPP; - goto err_ct; - } - } - - exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; @@ -3636,8 +3635,8 @@ ctnetlink_create_expect(struct net *net, nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); -err_ct: nf_ct_put(ct); + return err; }
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7c6f7c9..645d2c4 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -582,7 +582,8 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, } static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { - [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_SCTP_STATE] = NLA_POLICY_MAX(NLA_U8, + SCTP_CONNTRACK_HEARTBEAT_SENT), [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, };
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0c1d086..b67426c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1385,9 +1385,9 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, } static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { - [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, - [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, + [CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2), + [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; @@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; - if (tb[CTA_PROTOINFO_TCP_STATE] && - nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) - return -EINVAL; - spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index ca748f8..939502f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c
@@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || - nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->helper != nfct_help(ct)->helper || exp->class != class) break; #if IS_ENABLED(CONFIG_NF_NAT) @@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; + bool have_rtp_addr = false; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, caddr_len = 0; if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, SDP_HDR_CONNECTION, SDP_HDR_MEDIA, - &matchoff, &matchlen, &caddr) > 0) + &matchoff, &matchlen, &caddr) > 0) { caddr_len = matchlen; + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + have_rtp_addr = true; + } mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { @@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen, &maddr) > 0) { maddr_len = matchlen; memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) + have_rtp_addr = true; + } else if (caddr_len) { memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else { + have_rtp_addr = true; + } else { nf_ct_helper_log(skb, ct, "cannot parse SDP message"); return NF_DROP; } @@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, /* Update session connection and owner addresses */ hooks = rcu_dereference(nf_nat_sip_hooks); - if (hooks && ct->status & IPS_NAT_MASK) + if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr) ret = hooks->sdp_session(skb, protoff, dataoff, dptr, datalen, sdpoff, &rtp_addr); @@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - exp->helper = helper; + rcu_assign_pointer(exp->helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); @@ -1534,11 +1540,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, { struct tcphdr *th, _tcph; unsigned int dataoff, datalen; - unsigned int matchoff, matchlen, clen; + unsigned int matchoff, matchlen; unsigned int msglen, origlen; const char *dptr, *end; s16 diff, tdiff = 0; int ret = NF_ACCEPT; + unsigned long clen; bool term; if (ctinfo != IP_CT_ESTABLISHED && @@ -1573,6 +1580,9 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (dptr + matchoff == end) break; + if (clen > datalen) + break; + term = false; for (; end + strlen("\r\n\r\n") <= dptr + datalen; end++) { if (end[0] == '\r' && end[1] == '\n' &&
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 3fdb10d..fd56d66 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c
@@ -738,6 +738,7 @@ static int nf_flow_encap_push(struct sk_buff *skb, switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): + skb_reset_mac_header(skb); if (skb_vlan_push(skb, tuple->encap[i].proto, tuple->encap[i].id) < 0) return -1;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9b677e1..93d0aa7 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c
@@ -14,6 +14,8 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> +#define NF_FLOW_RULE_ACTION_MAX 24 + static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; @@ -216,7 +218,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry, static inline struct flow_action_entry * flow_action_entry_next(struct nf_flow_rule *flow_rule) { - int i = flow_rule->rule->action.num_entries++; + int i; + + if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX)) + return NULL; + + i = flow_rule->rule->action.num_entries++; return &flow_rule->rule->action.entries[i]; } @@ -234,6 +241,9 @@ static int flow_offload_eth_src(struct net *net, u32 mask, val; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -284,6 +294,9 @@ static int flow_offload_eth_dst(struct net *net, u8 nud_state; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -325,16 +338,19 @@ static int flow_offload_eth_dst(struct net *net, return 0; } -static void flow_offload_ipv4_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; @@ -345,23 +361,27 @@ static void flow_offload_ipv4_snat(struct net *net, offset = offsetof(struct iphdr, daddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv4_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; @@ -372,14 +392,15 @@ static void flow_offload_ipv4_dnat(struct net *net, offset = offsetof(struct iphdr, saddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, +static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, const __be32 *addr, const __be32 *mask) { @@ -388,15 +409,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, offset + i * sizeof(u32), &addr[i], mask); } + + return 0; } -static void flow_offload_ipv6_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -412,16 +438,16 @@ static void flow_offload_ipv6_snat(struct net *net, offset = offsetof(struct ipv6hdr, daddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } -static void flow_offload_ipv6_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -437,10 +463,10 @@ static void flow_offload_ipv6_dnat(struct net *net, offset = offsetof(struct ipv6hdr, saddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) @@ -462,15 +488,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow) return type; } -static void flow_offload_port_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); @@ -485,22 +514,26 @@ static void flow_offload_port_snat(struct net *net, mask = ~htonl(0xffff); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_port_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); @@ -515,20 +548,24 @@ static void flow_offload_port_dnat(struct net *net, mask = ~htonl(0xffff0000); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_ipv4_checksum(struct net *net, - const struct flow_offload *flow, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_checksum(struct net *net, + const struct flow_offload *flow, + struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; struct flow_action_entry *entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; @@ -540,12 +577,14 @@ static void flow_offload_ipv4_checksum(struct net *net, entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; break; } + + return 0; } -static void flow_offload_redirect(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_redirect(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple, *other_tuple; struct flow_action_entry *entry; @@ -563,21 +602,28 @@ static void flow_offload_redirect(struct net *net, ifindex = other_tuple->iifidx; break; default: - return; + return -EOPNOTSUPP; } dev = dev_get_by_index(net, ifindex); if (!dev) - return; + return -ENODEV; entry = flow_action_entry_next(flow_rule); + if (!entry) { + dev_put(dev); + return -E2BIG; + } + entry->id = FLOW_ACTION_REDIRECT; entry->dev = dev; + + return 0; } -static void flow_offload_encap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; @@ -585,7 +631,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, this_tuple = &flow->tuplehash[dir].tuple; if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -594,15 +640,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_ENCAP; entry->tunnel = tun_info; } } + + return 0; } -static void flow_offload_decap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; @@ -610,7 +660,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -619,9 +669,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_DECAP; } } + + return 0; } static int @@ -633,8 +687,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, const struct flow_offload_tuple *tuple; int i; - flow_offload_decap_tunnel(flow, dir, flow_rule); - flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 || + flow_offload_encap_tunnel(flow, dir, flow_rule) < 0) + return -1; if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) @@ -650,6 +705,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; entry->id = FLOW_ACTION_VLAN_POP; } } @@ -663,6 +720,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, continue; entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; switch (other_tuple->encap[i].proto) { case htons(ETH_P_PPP_SES): @@ -688,18 +747,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv4_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv4_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_SNAT, &flow->flags) || test_bit(NF_FLOW_DNAT, &flow->flags)) - flow_offload_ipv4_checksum(net, flow, flow_rule); + if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0) + return -1; - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } @@ -713,22 +776,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv6_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv6_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); -#define NF_FLOW_RULE_ACTION_MAX 16 - static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload_work *offload,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fd7f7e4..8c42247 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c
@@ -829,10 +829,14 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, nft_set_elem_change_active(ctx->net, set, ext); nft_setelem_data_deactivate(ctx->net, set, catchall->elem); - break; } } +/* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation + * phase, the set clone might already exist from a previous command, or it might + * be a set that is going away and does not require a clone. The netns and + * netlink release paths also need to work on the live set. + */ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) { struct nft_set_iter iter = { @@ -5868,7 +5872,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, nft_clear(ctx->net, ext); nft_setelem_data_activate(ctx->net, set, catchall->elem); - break; } } @@ -6741,8 +6744,8 @@ static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } } -static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, - struct nft_set_elem_expr *elem_expr) +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr) { struct nft_expr *expr; u32 size; @@ -7153,8 +7156,7 @@ static u32 nft_set_maxsize(const struct nft_set *set) } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr, u32 nlmsg_flags, - bool last) + const struct nlattr *attr, u32 nlmsg_flags) { struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {}; struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -7170,6 +7172,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; + bool set_full = false; u64 expiration; u64 timeout; int err, i; @@ -7440,11 +7443,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; - if (last) - elem.flags = NFT_SET_ELEM_INTERNAL_LAST; - else - elem.flags = 0; - if (obj) *nft_set_ext_obj(ext) = obj; @@ -7461,10 +7459,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_elem_free; + if (!(flags & NFT_SET_ELEM_CATCHALL)) { + unsigned int max = nft_set_maxsize(set), nelems; + + nelems = atomic_inc_return(&set->nelems); + if (nelems > max) + set_full = true; + } + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) { err = -ENOMEM; - goto err_elem_free; + goto err_set_size; } ext->genmask = nft_genmask_cur(ctx->net); @@ -7516,7 +7522,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ue->priv = elem_priv; nft_trans_commit_list_add_elem(ctx->net, trans); - goto err_elem_free; + goto err_set_size; } } } @@ -7534,23 +7540,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_element_clash; } - if (!(flags & NFT_SET_ELEM_CATCHALL)) { - unsigned int max = nft_set_maxsize(set); - - if (!atomic_add_unless(&set->nelems, 1, max)) { - err = -ENFILE; - goto err_set_full; - } - } - nft_trans_container_elem(trans)->elems[0].priv = elem.priv; nft_trans_commit_list_add_elem(ctx->net, trans); - return 0; -err_set_full: - nft_setelem_remove(ctx->net, set, elem.priv); + return set_full ? -ENFILE : 0; + err_element_clash: kfree(trans); +err_set_size: + if (!(flags & NFT_SET_ELEM_CATCHALL)) + atomic_dec(&set->nelems); err_elem_free: nf_tables_set_elem_destroy(ctx, set, elem.priv); err_parse_data: @@ -7608,8 +7607,7 @@ static int nf_tables_newsetelem(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags, - nla_is_last(attr, rem)); + err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); return err; @@ -7733,7 +7731,7 @@ static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx, } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr, bool last) + const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_ext_tmpl tmpl; @@ -7801,11 +7799,6 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; - if (last) - elem.flags = NFT_SET_ELEM_INTERNAL_LAST; - else - elem.flags = 0; - trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (trans == NULL) goto fail_trans; @@ -7901,9 +7894,12 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) { + /* The set backend might need to clone the set, do it now from the + * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type. + */ struct nft_set_iter iter = { .genmask = genmask, - .type = NFT_ITER_UPDATE, + .type = NFT_ITER_UPDATE_CLONE, .fn = nft_setelem_flush, }; @@ -7953,8 +7949,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb, return nft_set_flush(&ctx, set, genmask); nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_del_setelem(&ctx, set, attr, - nla_is_last(attr, rem)); + err = nft_del_setelem(&ctx, set, attr); if (err == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM) continue; @@ -9208,6 +9203,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, return 0; err_flowtable_hooks: + synchronize_rcu(); nft_trans_destroy(trans); err_flowtable_trans: nft_hooks_destroy(&flowtable->hook_list); @@ -9678,7 +9674,7 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ - if (!match || (changename && ops)) + if (!match || ops) continue; ops = kzalloc_obj(struct nf_hook_ops, @@ -10483,11 +10479,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) schedule_work(&trans_gc_work); } -static int nft_trans_gc_space(struct nft_trans_gc *trans) -{ - return NFT_TRANS_GC_BATCHCOUNT - trans->count; -} - struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { @@ -11676,8 +11667,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, switch (data->verdict.code) { case NF_ACCEPT: case NF_DROP: - case NF_QUEUE: - break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -11712,6 +11701,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + case NF_QUEUE: + /* The nft_queue expression is used for this purpose, an + * immediate NF_QUEUE verdict should not ever be seen here. + */ + fallthrough; default: return -EINVAL; }
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d658b14..d545fa4 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c
@@ -601,10 +601,10 @@ nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) goto out; } } - } - if (cb->args[1]) { - cb->args[1] = 0; - goto restart; + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } } out: rcu_read_unlock();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a909..f80978c 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c
@@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log, if (data_len) { struct nlattr *nla; - int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len); + if (!nla) goto nla_put_failure; - nla = skb_put(inst->skb, nla_total_size(data_len)); - nla->nla_type = NFULA_PAYLOAD; - nla->nla_len = size; - if (skb_copy_bits(skb, 0, nla_data(nla), data_len)) BUG(); } @@ -730,7 +726,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) - + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ + + nlmsg_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len)
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 94e3eac..45d9ad2 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c
@@ -302,7 +302,9 @@ static int nfnl_osf_add_callback(struct sk_buff *skb, { struct nf_osf_user_finger *f; struct nf_osf_finger *kf = NULL, *sf; + unsigned int tot_opt_len = 0; int err = 0; + int i; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -318,6 +320,17 @@ static int nfnl_osf_add_callback(struct sk_buff *skb, if (f->opt_num > ARRAY_SIZE(f->opt)) return -EINVAL; + for (i = 0; i < f->opt_num; i++) { + if (!f->opt[i].length || f->opt[i].length > MAX_IPOPTLEN) + return -EINVAL; + if (f->opt[i].kind == OSFOPT_MSS && f->opt[i].length < 4) + return -EINVAL; + + tot_opt_len += f->opt[i].length; + if (tot_opt_len > MAX_IPOPTLEN) + return -EINVAL; + } + if (!memchr(f->genre, 0, MAXGENRELEN) || !memchr(f->subtype, 0, MAXGENRELEN) || !memchr(f->version, 0, MAXGENRELEN))
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7f5248b..47f7f62 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c
@@ -1546,8 +1546,10 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (entry->state.pf == PF_BRIDGE) { err = nfqa_parse_bridge(entry, nfqa); - if (err < 0) + if (err < 0) { + nfqnl_reinject(entry, NF_DROP); return err; + } } if (nfqa[NFQA_PAYLOAD]) {
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index b16185e..041426e 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c
@@ -344,7 +344,7 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev, break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ - if (!match || (changename && ops)) + if (!match || ops) continue; ops = kmemdup(&basechain->ops,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 47d3ef1..128ff81 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c
@@ -23,6 +23,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_seqadj.h> +#include "nf_internals.h" struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -543,6 +544,7 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) #endif #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: + nf_queue_nf_hook_drop(ctx->net); mutex_lock(&nft_ct_pcpu_mutex); if (--nft_ct_pcpu_template_refcnt == 0) nft_ct_tmpl_put_pcpu(); @@ -1015,6 +1017,7 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nft_ct_timeout_obj *priv = nft_obj_data(obj); struct nf_ct_timeout *timeout = priv->timeout; + nf_queue_nf_hook_drop(ctx->net); nf_ct_untimeout(ctx->net, timeout); nf_ct_netns_put(ctx->net, ctx->family); kfree(priv->timeout); @@ -1147,6 +1150,7 @@ static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, { struct nft_ct_helper_obj *priv = nft_obj_data(obj); + nf_queue_nf_hook_drop(ctx->net); if (priv->helper4) nf_conntrack_helper_put(priv->helper4); if (priv->helper6)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 7807d81..9123277 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c
@@ -30,18 +30,26 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, const struct nft_set_ext *ext) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); + struct nft_ctx ctx = { + .net = read_pnet(&priv->set->net), + .family = priv->set->table->family, + }; struct nft_expr *expr; int i; for (i = 0; i < priv->num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0) - return -1; + goto err_out; elem_expr->size += priv->expr_array[i]->ops->size; } return 0; +err_out: + nft_set_elem_expr_destroy(&ctx, elem_expr); + + return -1; } struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 739b992..b0e571c 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c
@@ -374,6 +374,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, { switch (iter->type) { case NFT_ITER_UPDATE: + case NFT_ITER_UPDATE_CLONE: /* only relevant for netlink dumps which use READ type */ WARN_ON_ONCE(iter->skip != 0);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7ef4b44..7fd24e0 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c
@@ -1640,6 +1640,7 @@ static void pipapo_drop(struct nft_pipapo_match *m, int i; nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1; int g; for (g = 0; g < f->groups; g++) { @@ -1659,7 +1660,7 @@ static void pipapo_drop(struct nft_pipapo_match *m, } pipapo_unmap(f->mt, f->rules, rulemap[i].to, rulemap[i].n, - rulemap[i + 1].n, i == m->field_count - 1); + last ? 0 : rulemap[i + 1].n, last); if (pipapo_resize(f, f->rules, f->rules - rulemap[i].n)) { /* We can ignore this, a failure to shrink tables down * doesn't make tables invalid. @@ -1680,11 +1681,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, } /** - * pipapo_gc() - Drop expired entries from set, destroy start and end elements + * pipapo_gc_scan() - Drop expired entries from set and link them to gc list * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); @@ -1697,6 +1698,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (!gc) return; + list_add(&gc->list, &priv->gc_head); + while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const struct nft_pipapo_field *f; @@ -1724,9 +1727,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) * NFT_SET_ELEM_DEAD_BIT. */ if (__nft_set_elem_expired(&e->ext, tstamp)) { - gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); - if (!gc) - return; + if (!nft_trans_gc_space(gc)) { + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; + + list_add(&gc->list, &priv->gc_head); + } nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap); @@ -1740,10 +1747,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) } } - gc = nft_trans_gc_catchall_sync(gc); + priv->last_gc = jiffies; +} + +/** + * pipapo_gc_queue() - Free expired elements + * @set: nftables API set representation + */ +static void pipapo_gc_queue(struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_trans_gc *gc, *next; + + /* always do a catchall cycle: */ + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (gc) { + gc = nft_trans_gc_catchall_sync(gc); + if (gc) + nft_trans_gc_queue_sync_done(gc); + } + + /* always purge queued gc elements. */ + list_for_each_entry_safe(gc, next, &priv->gc_head, list) { + list_del(&gc->list); nft_trans_gc_queue_sync_done(gc); - priv->last_gc = jiffies; } } @@ -1797,6 +1824,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * * We also need to create a new working copy for subsequent insertions and * deletions. + * + * After the live copy has been replaced by the clone, we can safely queue + * expired elements that have been collected by pipapo_gc_scan() for + * memory reclaim. */ static void nft_pipapo_commit(struct nft_set *set) { @@ -1807,7 +1838,7 @@ static void nft_pipapo_commit(struct nft_set *set) return; if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) - pipapo_gc(set, priv->clone); + pipapo_gc_scan(set, priv->clone); old = rcu_replace_pointer(priv->match, priv->clone, nft_pipapo_transaction_mutex_held(set)); @@ -1815,6 +1846,8 @@ static void nft_pipapo_commit(struct nft_set *set) if (old) call_rcu(&old->rcu, pipapo_reclaim_match); + + pipapo_gc_queue(set); } static void nft_pipapo_abort(const struct nft_set *set) @@ -2144,13 +2177,20 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_pipapo_match *m; switch (iter->type) { - case NFT_ITER_UPDATE: + case NFT_ITER_UPDATE_CLONE: m = pipapo_maybe_clone(set); if (!m) { iter->err = -ENOMEM; return; } - + nft_pipapo_do_walk(ctx, set, m, iter); + break; + case NFT_ITER_UPDATE: + if (priv->clone) + m = priv->clone; + else + m = rcu_dereference_protected(priv->match, + nft_pipapo_transaction_mutex_held(set)); nft_pipapo_do_walk(ctx, set, m, iter); break; case NFT_ITER_READ: @@ -2272,6 +2312,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->mt = NULL; } + INIT_LIST_HEAD(&priv->gc_head); rcu_assign_pointer(priv->match, m); return 0; @@ -2321,6 +2362,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; + WARN_ON_ONCE(!list_empty(&priv->gc_head)); + m = rcu_dereference_protected(priv->match, true); if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index eaab422..9aee9a9 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match { * @clone: Copy where pending insertions and deletions are kept * @width: Total bytes to be matched for one packet, including padding * @last_gc: Timestamp of last garbage collection run, jiffies + * @gc_head: list of nft_trans_gc to queue up for mem reclaim */ struct nft_pipapo { struct nft_pipapo_match __rcu *match; struct nft_pipapo_match *clone; int width; unsigned long last_gc; + struct list_head gc_head; }; struct nft_pipapo_elem;
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7ff9032..6395982 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE; @@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); if (last) - return b; + ret = b; if (unlikely(ret == -1)) ret = b / XSAVE_YMM_SIZE;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3f02e44..737c339 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c
@@ -304,19 +304,10 @@ static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv, priv->start_rbe_cookie = (unsigned long)rbe; } -static void nft_rbtree_set_start_cookie_open(struct nft_rbtree *priv, - const struct nft_rbtree_elem *rbe, - unsigned long open_interval) -{ - priv->start_rbe_cookie = (unsigned long)rbe | open_interval; -} - -#define NFT_RBTREE_OPEN_INTERVAL 1UL - static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv, const struct nft_rbtree_elem *rbe) { - return (priv->start_rbe_cookie & ~NFT_RBTREE_OPEN_INTERVAL) == (unsigned long)rbe; + return priv->start_rbe_cookie == (unsigned long)rbe; } static bool nft_rbtree_insert_same_interval(const struct net *net, @@ -346,14 +337,13 @@ static bool nft_rbtree_insert_same_interval(const struct net *net, static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, - struct nft_elem_priv **elem_priv, u64 tstamp, bool last) + struct nft_elem_priv **elem_priv, u64 tstamp) { struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev; struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); - unsigned long open_interval = 0; int d; /* Descend the tree to search for an existing element greater than the @@ -459,18 +449,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } - if (nft_rbtree_interval_null(set, new)) { + if (nft_rbtree_interval_null(set, new)) priv->start_rbe_cookie = 0; - } else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) { - if (nft_set_is_anonymous(set)) { - priv->start_rbe_cookie = 0; - } else if (priv->start_rbe_cookie & NFT_RBTREE_OPEN_INTERVAL) { - /* Previous element is an open interval that partially - * overlaps with an existing non-open interval. - */ - return -ENOTEMPTY; - } - } + else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) + priv->start_rbe_cookie = 0; /* - new start element matching existing start element: full overlap * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. @@ -478,27 +460,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { *elem_priv = &rbe_ge->priv; - - /* - Corner case: new start element of open interval (which - * comes as last element in the batch) overlaps the start of - * an existing interval with an end element: partial overlap. - */ - node = rb_first(&priv->root); - rbe = __nft_rbtree_next_active(node, genmask); - if (rbe && nft_rbtree_interval_end(rbe)) { - rbe = nft_rbtree_next_active(rbe, genmask); - if (rbe && - nft_rbtree_interval_start(rbe) && - !nft_rbtree_cmp(set, new, rbe)) { - if (last) - return -ENOTEMPTY; - - /* Maybe open interval? */ - open_interval = NFT_RBTREE_OPEN_INTERVAL; - } - } - nft_rbtree_set_start_cookie_open(priv, rbe_ge, open_interval); - + nft_rbtree_set_start_cookie(priv, rbe_ge); return -EEXIST; } @@ -553,12 +515,6 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; - /* - start element overlaps an open interval but end element is new: - * partial overlap, reported as -ENOEMPTY. - */ - if (!rbe_ge && priv->start_rbe_cookie && nft_rbtree_interval_end(new)) - return -ENOTEMPTY; - /* Accepted element: pick insertion point depending on key value */ parent = NULL; p = &priv->root.rb_node; @@ -616,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals) return array; } -#define NFT_ARRAY_EXTRA_SIZE 10240 - /* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider * packed representation coming from userspace for anonymous sets too. */ static u32 nft_array_elems(const struct nft_set *set) { - u32 nelems = atomic_read(&set->nelems); + u32 nelems = atomic_read(&set->nelems) - set->ndeact; /* Adjacent intervals are represented with a single start element in * anonymous sets, use the current element counter as is. @@ -639,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set) return (nelems / 2) + 2; } -static int nft_array_may_resize(const struct nft_set *set) +#define NFT_ARRAY_INITIAL_SIZE 1024 +#define NFT_ARRAY_INITIAL_ANON_SIZE 16 +#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval)) + +static int nft_array_may_resize(const struct nft_set *set, bool flush) { - u32 nelems = nft_array_elems(set), new_max_intervals; + u32 initial_intervals, max_intervals, new_max_intervals, delta; + u32 shrinked_max_intervals, nelems = nft_array_elems(set); struct nft_rbtree *priv = nft_set_priv(set); struct nft_array *array; - if (!priv->array_next) { - array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE); + if (nft_set_is_anonymous(set)) + initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE; + else + initial_intervals = NFT_ARRAY_INITIAL_SIZE; + + if (priv->array_next) { + max_intervals = priv->array_next->max_intervals; + new_max_intervals = priv->array_next->max_intervals; + } else { + if (priv->array) { + max_intervals = priv->array->max_intervals; + new_max_intervals = priv->array->max_intervals; + } else { + max_intervals = 0; + new_max_intervals = initial_intervals; + } + } + + if (nft_set_is_anonymous(set)) + goto maybe_grow; + + if (flush) { + /* Set flush just started, nelems still report elements.*/ + nelems = 0; + new_max_intervals = NFT_ARRAY_INITIAL_SIZE; + goto realloc_array; + } + + if (check_add_overflow(new_max_intervals, new_max_intervals, + &shrinked_max_intervals)) + return -EOVERFLOW; + + shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3); + + if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE && + nelems < shrinked_max_intervals) { + new_max_intervals = shrinked_max_intervals; + goto realloc_array; + } +maybe_grow: + if (nelems > new_max_intervals) { + if (nft_set_is_anonymous(set) && + new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) { + new_max_intervals <<= 1; + } else { + delta = new_max_intervals >> 1; + if (check_add_overflow(new_max_intervals, delta, + &new_max_intervals)) + return -EOVERFLOW; + } + } + +realloc_array: + if (WARN_ON_ONCE(nelems > new_max_intervals)) + return -ENOMEM; + + if (priv->array_next) { + if (max_intervals == new_max_intervals) + return 0; + + if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) + return -ENOMEM; + } else { + array = nft_array_alloc(new_max_intervals); if (!array) return -ENOMEM; priv->array_next = array; } - if (nelems < priv->array_next->max_intervals) - return 0; - - new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE; - if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0) - return -ENOMEM; - return 0; } @@ -668,14 +682,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); - bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST); struct nft_rbtree *priv = nft_set_priv(set); u64 tstamp = nft_net_tstamp(net); int err; nft_rbtree_maybe_reset_start_cookie(priv, tstamp); - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return -ENOMEM; do { @@ -685,12 +698,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, cond_resched(); write_lock_bh(&priv->lock); - err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp, last); + err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp); write_unlock_bh(&priv->lock); - - if (nft_rbtree_interval_end(rbe)) - priv->start_rbe_cookie = 0; - } while (err == -EAGAIN); return err; @@ -778,7 +787,6 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); - bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST); struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; u8 genmask = nft_genmask_next(net); @@ -791,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_null(set, this)) priv->start_rbe_cookie = 0; - if (nft_array_may_resize(set) < 0) + if (nft_array_may_resize(set, false) < 0) return NULL; while (parent != NULL) { @@ -819,10 +827,9 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, continue; } - if (nft_rbtree_interval_start(rbe)) { - if (!last) - nft_rbtree_set_start_cookie(priv, rbe); - } else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) + if (nft_rbtree_interval_start(rbe)) + nft_rbtree_set_start_cookie(priv, rbe); + else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) return NULL; nft_rbtree_flush(net, set, &rbe->priv); @@ -861,13 +868,15 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree *priv = nft_set_priv(set); switch (iter->type) { - case NFT_ITER_UPDATE: - lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); - - if (nft_array_may_resize(set) < 0) { + case NFT_ITER_UPDATE_CLONE: + if (nft_array_may_resize(set, true) < 0) { iter->err = -ENOMEM; break; } + fallthrough; + case NFT_ITER_UPDATE: + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); + nft_rbtree_do_walk(ctx, set, iter); break; case NFT_ITER_READ:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e594b3b..b39017c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c
@@ -501,6 +501,17 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->match->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s match: not valid for this family\n", + xt_prefix[par->family], par->match->name); + return -EINVAL; + } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; @@ -1016,6 +1027,18 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->target->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s target: not valid for this family\n", + xt_prefix[par->family], par->target->name); + return -EINVAL; + } + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64];
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 3ba94c3..498f587 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c
@@ -16,6 +16,7 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> +#include "nf_internals.h" static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) { @@ -283,6 +284,9 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn_help *help; if (ct) { + if (info->helper[0] || info->timeout[0]) + nf_queue_nf_hook_drop(par->net); + help = nfct_help(ct); xt_ct_put_helper(help);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 5d93e22..5171061 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c
@@ -318,6 +318,12 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); + mutex_unlock(&list_mutex); + return -EINVAL; + } + info->timer->refcnt++; mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index c437fbd..43d2ae2 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c
@@ -65,6 +65,9 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", @@ -102,6 +105,9 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n",
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index e5a13ec..037ab93 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c
@@ -62,10 +62,10 @@ dccp_find_option(u_int8_t option, return true; } - if (op[i] < 2) + if (op[i] < 2 || i == optlen - 1) i++; else - i += op[i+1]?:1; + i += op[i + 1] ? : 1; } spin_unlock_bh(&dccp_buflock);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 72324bd9..b1d736c1 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c
@@ -91,6 +91,11 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) goto err1; } + if (strnlen(info->name1, sizeof(info->name1)) >= sizeof(info->name1)) + return -ENAMETOOLONG; + if (strnlen(info->name2, sizeof(info->name2)) >= sizeof(info->name2)) + return -ENAMETOOLONG; + ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1)
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index e899113..f76cf18 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c
@@ -59,8 +59,10 @@ tcp_find_option(u_int8_t option, for (i = 0; i < optlen; ) { if (op[i] == option) return !invert; - if (op[i] < 2) i++; - else i += op[i+1]?:1; + if (op[i] < 2 || i == optlen - 1) + i++; + else + i += op[i + 1] ? : 1; } return invert;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 00319d2..d9d7401 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c
@@ -223,13 +223,13 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) localtime_2(¤t_time, stamp); - if (!(info->weekdays_match & (1 << current_time.weekday))) + if (!(info->weekdays_match & (1U << current_time.weekday))) return false; /* Do not spend time computing monthday if all days match anyway */ if (info->monthdays_match != XT_TIME_ALL_MONTHDAYS) { localtime_3(¤t_time, stamp); - if (!(info->monthdays_match & (1 << current_time.monthday))) + if (!(info->monthdays_match & (1U << current_time.monthday))) return false; }
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 3670bb3..7cb1e6a 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c
@@ -707,8 +707,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, int rc; data_exch = kzalloc_obj(*data_exch); - if (!data_exch) + if (!data_exch) { + kfree_skb(skb); return -ENOMEM; + } data_exch->cb = cb; data_exch->cb_context = cb_context; @@ -731,8 +733,10 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, data_exch); exit: - if (rc) + if (rc) { + kfree_skb(skb); kfree(data_exch); + } return rc; }
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6e9b76e..5f46c4b 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c
@@ -567,6 +567,10 @@ static int nci_close_device(struct nci_dev *ndev) flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); timer_delete_sync(&ndev->data_timer); + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, + ndev->cur_conn_id, + -ENODEV); mutex_unlock(&ndev->req_lock); return 0; } @@ -575,8 +579,7 @@ static int nci_close_device(struct nci_dev *ndev) skb_queue_purge(&ndev->rx_q); skb_queue_purge(&ndev->tx_q); - /* Flush RX and TX wq */ - flush_workqueue(ndev->rx_wq); + /* Flush TX wq, RX wq flush can't be under the lock */ flush_workqueue(ndev->tx_wq); /* Reset device */ @@ -588,22 +591,30 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty - * and no works are scheduled. + * rx work may be running but will see that NCI_UP was cleared */ ndev->ops->close(ndev); clear_bit(NCI_INIT, &ndev->flags); - /* Flush cmd wq */ + /* Flush cmd and tx wq */ flush_workqueue(ndev->cmd_wq); timer_delete_sync(&ndev->cmd_timer); + timer_delete_sync(&ndev->data_timer); + + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id, + -ENODEV); /* Clear flags except NCI_UNREG */ ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); + /* rx_work may take req_lock via nci_deactivate_target */ + flush_workqueue(ndev->rx_wq); + return 0; } @@ -1035,18 +1046,23 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct nci_conn_info *conn_info; conn_info = ndev->rf_conn_info; - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return -EPROTO; + } pr_debug("target_idx %d, len %d\n", target->idx, skb->len); if (!ndev->target_active_prot) { pr_err("unable to exchange data, no active target\n"); + kfree_skb(skb); return -EINVAL; } - if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) { + kfree_skb(skb); return -EBUSY; + } /* store cb and context to be used on receiving data */ conn_info->data_exchange_cb = cb; @@ -1482,10 +1498,20 @@ static bool nci_valid_size(struct sk_buff *skb) unsigned int hdr_size = NCI_CTRL_HDR_SIZE; if (skb->len < hdr_size || - !nci_plen(skb->data) || skb->len < hdr_size + nci_plen(skb->data)) { return false; } + + if (!nci_plen(skb->data)) { + /* Allow zero length in proprietary notifications (0x20 - 0x3F). */ + if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 && + nci_mt(skb->data) == NCI_MT_NTF_PKT) + return true; + + /* Disallow zero length otherwise. */ + return false; + } + return true; }
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 78f4131..5f98c73 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c
@@ -33,7 +33,8 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) { kfree_skb(skb); - goto exit; + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); + return; } cb = conn_info->data_exchange_cb; @@ -45,6 +46,12 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, timer_delete_sync(&ndev->data_timer); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); + /* Mark the exchange as done before calling the callback. + * The callback (e.g. rawsock_data_exchange_complete) may + * want to immediately queue another data exchange. + */ + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); + if (cb) { /* forward skb to nfc core */ cb(cb_context, skb, err); @@ -54,9 +61,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, /* no waiting callback, free skb */ kfree_skb(skb); } - -exit: - clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); } /* ----------------- NCI TX Data ----------------- */
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b049022..f7d7a59 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c
@@ -67,6 +67,17 @@ static int rawsock_release(struct socket *sock) if (sock->type == SOCK_RAW) nfc_sock_unlink(&raw_sk_list, sk); + if (sk->sk_state == TCP_ESTABLISHED) { + /* Prevent rawsock_tx_work from starting new transmits and + * wait for any in-progress work to finish. This must happen + * before the socket is orphaned to avoid a race where + * rawsock_tx_work runs after the NCI device has been freed. + */ + sk->sk_shutdown |= SEND_SHUTDOWN; + cancel_work_sync(&nfc_rawsock(sk)->tx_work); + rawsock_write_queue_purge(sk); + } + sock_orphan(sk); sock_put(sk);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 67fbf6e..1305240 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c
@@ -2953,6 +2953,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; + if (key_len != sizeof(struct ovs_key_mpls)) + return -EINVAL; break; case OVS_KEY_ATTR_SCTP:
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6574f9b..12055af 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c
@@ -151,11 +151,15 @@ static void vport_netdev_free(struct rcu_head *rcu) void ovs_netdev_detach_dev(struct vport *vport) { ASSERT_RTNL(); - vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(vport->dev); netdev_upper_dev_unlink(vport->dev, netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); + + /* paired with smp_mb() in netdev_destroy() */ + smp_wmb(); + + vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; } static void netdev_destroy(struct vport *vport) @@ -174,6 +178,9 @@ static void netdev_destroy(struct vport *vport) rtnl_unlock(); } + /* paired with smp_wmb() in ovs_netdev_detach_dev() */ + smp_mb(); + call_rcu(&vport->rcu, vport_netdev_free); } @@ -189,8 +196,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - netdev_put(vport->dev, &vport->dev_tracker); - vport->dev = NULL; rtnl_unlock(); call_rcu(&vport->rcu, vport_netdev_free);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 72d0935..bb2d882 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c
@@ -3135,6 +3135,7 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + WRITE_ONCE(po->num, 0); packet_cached_dev_reset(po); if (po->prot_hook.dev) {
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 238a963..d89225d6 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c
@@ -129,9 +129,12 @@ static int pn_header_create(struct sk_buff *skb, struct net_device *dev, return 1; } -static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) +static int pn_header_parse(const struct sk_buff *skb, + const struct net_device *dev, + unsigned char *haddr) { const u8 *media = skb_mac_header(skb); + *haddr = *media; return 1; }
diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index c5c42b5..d4c04c9 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c
@@ -166,9 +166,46 @@ static void psp_write_headers(struct net *net, struct sk_buff *skb, __be32 spi, { struct udphdr *uh = udp_hdr(skb); struct psphdr *psph = (struct psphdr *)(uh + 1); + const struct sock *sk = skb->sk; uh->dest = htons(PSP_DEFAULT_UDP_PORT); - uh->source = udp_flow_src_port(net, skb, 0, 0, false); + + /* A bit of theory: Selection of the source port. + * + * We need some entropy, so that multiple flows use different + * source ports for better RSS spreading at the receiver. + * + * We also need that all packets belonging to one TCP flow + * use the same source port through their duration, + * so that all these packets land in the same receive queue. + * + * udp_flow_src_port() is using sk_txhash, inherited from + * skb_set_hash_from_sk() call in __tcp_transmit_skb(). + * This field is subject to reshuffling, thanks to + * sk_rethink_txhash() calls in various TCP functions. + * + * Instead, use sk->sk_hash which is constant through + * the whole flow duration. + */ + if (likely(sk)) { + u32 hash = sk->sk_hash; + int min, max; + + /* These operations are cheap, no need to cache the result + * in another socket field. + */ + inet_get_local_port_range(net, &min, &max); + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possibility that any useful information to an + * attacker is leaked. Only upper 16 bits are relevant in the + * computation for 16 bit port value because we use a + * reciprocal divide. + */ + hash ^= hash << 16; + uh->source = htons((((u64)hash * (max - min)) >> 32) + min); + } else { + uh->source = udp_flow_src_port(net, skb, 0, 0, false); + } uh->check = 0; uh->len = htons(udp_len);
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 55fd2dd..d77e9c8 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c
@@ -118,7 +118,7 @@ static DEFINE_XARRAY_ALLOC(qrtr_ports); * @ep: endpoint * @ref: reference count for node * @nid: node id - * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port + * @qrtr_tx_flow: xarray of qrtr_tx_flow, keyed by node << 32 | port * @qrtr_tx_lock: lock for qrtr_tx_flow inserts * @rx_queue: receive queue * @item: list item for broadcast list @@ -129,7 +129,7 @@ struct qrtr_node { struct kref ref; unsigned int nid; - struct radix_tree_root qrtr_tx_flow; + struct xarray qrtr_tx_flow; struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */ struct sk_buff_head rx_queue; @@ -172,6 +172,7 @@ static void __qrtr_node_release(struct kref *kref) struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; + unsigned long index; spin_lock_irqsave(&qrtr_nodes_lock, flags); /* If the node is a bridge for other nodes, there are possibly @@ -189,11 +190,9 @@ static void __qrtr_node_release(struct kref *kref) skb_queue_purge(&node->rx_queue); /* Free tx flow counters */ - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; - radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); + xa_for_each(&node->qrtr_tx_flow, index, flow) kfree(flow); - } + xa_destroy(&node->qrtr_tx_flow); kfree(node); } @@ -228,9 +227,7 @@ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb) key = remote_node << 32 | remote_port; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock(&flow->resume_tx.lock); flow->pending = 0; @@ -269,12 +266,13 @@ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, return 0; mutex_lock(&node->qrtr_tx_lock); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); + flow = xa_load(&node->qrtr_tx_flow, key); if (!flow) { flow = kzalloc_obj(*flow); if (flow) { init_waitqueue_head(&flow->resume_tx); - if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + if (xa_err(xa_store(&node->qrtr_tx_flow, key, flow, + GFP_KERNEL))) { kfree(flow); flow = NULL; } @@ -326,9 +324,7 @@ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node, unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock_irq(&flow->resume_tx.lock); flow->tx_failed = 1; @@ -599,7 +595,7 @@ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) node->nid = QRTR_EP_NID_AUTO; node->ep = ep; - INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL); + xa_init(&node->qrtr_tx_flow); mutex_init(&node->qrtr_tx_lock); qrtr_node_assign(node, nid); @@ -627,6 +623,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) struct qrtr_tx_flow *flow; struct sk_buff *skb; unsigned long flags; + unsigned long index; void __rcu **slot; mutex_lock(&node->ep_lock); @@ -649,10 +646,8 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; + xa_for_each(&node->qrtr_tx_flow, index, flow) wake_up_interruptible_all(&flow->resume_tx); - } mutex_unlock(&node->qrtr_tx_lock); qrtr_node_release(node);
diff --git a/net/rds/connection.c b/net/rds/connection.c index e23fd9a..412441a 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c
@@ -455,6 +455,9 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_unlock(); } + /* we do not hold the socket lock here but it is safe because + * fan-out is disabled when calling conn_slots_available() + */ if (conn->c_trans->conn_slots_available) conn->c_trans->conn_slots_available(conn, false); }
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 077f704..2cfec25 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c
@@ -604,8 +604,13 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, return ibmr; } - if (conn) + if (conn) { ic = conn->c_transport_data; + if (!ic || !ic->i_cm_id || !ic->i_cm_id->qp) { + ret = -ENODEV; + goto out; + } + } if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 04f3102..654e23d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c
@@ -490,18 +490,24 @@ bool rds_tcp_tune(struct socket *sock) struct rds_tcp_net *rtn; tcp_sock_set_nodelay(sock->sk); - lock_sock(sk); /* TCP timer functions might access net namespace even after * a process which created this net namespace terminated. */ if (!sk->sk_net_refcnt) { - if (!maybe_get_net(net)) { - release_sock(sk); + if (!maybe_get_net(net)) return false; - } + /* + * sk_net_refcnt_upgrade() must be called before lock_sock() + * because it does a GFP_KERNEL allocation, which can trigger + * fs_reclaim and create a circular lock dependency with the + * socket lock. The fields it modifies (sk_net_refcnt, + * ns_tracker) are not accessed by any concurrent code path + * at this point. + */ sk_net_refcnt_upgrade(sk); put_net(net); } + lock_sock(sk); rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index b4ab68a..08a506a 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c
@@ -59,30 +59,12 @@ void rds_tcp_keepalive(struct socket *sock) static int rds_tcp_get_peer_sport(struct socket *sock) { - union { - struct sockaddr_storage storage; - struct sockaddr addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - } saddr; - int sport; + struct sock *sk = sock->sk; - if (kernel_getpeername(sock, &saddr.addr) >= 0) { - switch (saddr.addr.sa_family) { - case AF_INET: - sport = ntohs(saddr.sin.sin_port); - break; - case AF_INET6: - sport = ntohs(saddr.sin6.sin6_port); - break; - default: - sport = -1; - } - } else { - sport = -1; - } + if (!sk) + return -1; - return sport; + return ntohs(READ_ONCE(inet_sk(sk)->inet_dport)); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 841d624..ba56213 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c
@@ -811,6 +811,11 @@ static int rose_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int goto out_release; } + if (sk->sk_state == TCP_SYN_SENT) { + err = -EALREADY; + goto out_release; + } + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0c2c68c..0f90272 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c
@@ -267,12 +267,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) * Lookup or create a remote transport endpoint record for the specified * address. * - * Return: The peer record found with a reference, %NULL if no record is found - * or a negative error code if the address is invalid or unsupported. + * Return: The peer record found with a reference or a negative error code if + * the address is invalid or unsupported. */ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, struct sockaddr_rxrpc *srx, gfp_t gfp) { + struct rxrpc_peer *peer; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); int ret; @@ -280,7 +281,8 @@ struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, if (ret < 0) return ERR_PTR(ret); - return rxrpc_lookup_peer(rx->local, srx, gfp); + peer = rxrpc_lookup_peer(rx->local, srx, gfp); + return peer ?: ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index bd51522..7d5e50c 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c
@@ -1360,6 +1360,12 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return -EINVAL; } + if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) { + NL_SET_ERR_MSG_MOD(extack, + "Attaching ct to a non ingress/clsact qdisc is unsupported"); + return -EOPNOTSUPP; + } + err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); if (err < 0) return err;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 686eaed..fdbfcaa 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c
@@ -32,9 +32,12 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void tcf_gate_params_free_rcu(struct rcu_head *head); + +static void gate_get_start_time(struct tcf_gate *gact, + const struct tcf_gate_params *param, + ktime_t *start) { - struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; u64 n; @@ -69,12 +72,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) { struct tcf_gate *gact = container_of(timer, struct tcf_gate, hitimer); - struct tcf_gate_params *p = &gact->param; struct tcfg_gate_entry *next; + struct tcf_gate_params *p; ktime_t close_time, now; spin_lock(&gact->tcf_lock); + p = rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); next = gact->next_entry; /* cycle start, clear pending bit, clear total octets */ @@ -225,6 +230,35 @@ static void release_entry_list(struct list_head *entries) } } +static int tcf_gate_copy_entries(struct tcf_gate_params *dst, + const struct tcf_gate_params *src, + struct netlink_ext_ack *extack) +{ + struct tcfg_gate_entry *entry; + int i = 0; + + list_for_each_entry(entry, &src->entries, list) { + struct tcfg_gate_entry *new; + + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); + return -ENOMEM; + } + + new->index = entry->index; + new->gate_state = entry->gate_state; + new->interval = entry->interval; + new->ipv = entry->ipv; + new->maxoctets = entry->maxoctets; + list_add_tail(&new->list, &dst->entries); + i++; + } + + dst->num_entries = i; + return 0; +} + static int parse_gate_list(struct nlattr *list_attr, struct tcf_gate_params *sched, struct netlink_ext_ack *extack) @@ -270,24 +304,44 @@ static int parse_gate_list(struct nlattr *list_attr, return err; } -static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, - enum tk_offsets tko, s32 clockid, - bool do_init) +static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime, + enum tk_offsets tko, + enum tk_offsets old_tko, + s32 clockid, s32 old_clockid) { - if (!do_init) { - if (basetime == gact->param.tcfg_basetime && - tko == gact->tk_offset && - clockid == gact->param.tcfg_clockid) - return; + return basetime != old_basetime || + clockid != old_clockid || + tko != old_tko; +} - spin_unlock_bh(&gact->tcf_lock); - hrtimer_cancel(&gact->hitimer); - spin_lock_bh(&gact->tcf_lock); +static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko, + struct netlink_ext_ack *extack) +{ + switch (clockid) { + case CLOCK_REALTIME: + *tko = TK_OFFS_REAL; + return 0; + case CLOCK_MONOTONIC: + *tko = TK_OFFS_MAX; + return 0; + case CLOCK_BOOTTIME: + *tko = TK_OFFS_BOOT; + return 0; + case CLOCK_TAI: + *tko = TK_OFFS_TAI; + return 0; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + return -EINVAL; } - gact->param.tcfg_basetime = basetime; - gact->param.tcfg_clockid = clockid; - gact->tk_offset = tko; - hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); +} + +static void gate_setup_timer(struct tcf_gate *gact, s32 clockid, + enum tk_offsets tko) +{ + WRITE_ONCE(gact->tk_offset, tko); + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, + HRTIMER_MODE_ABS_SOFT); } static int tcf_gate_init(struct net *net, struct nlattr *nla, @@ -296,15 +350,22 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); - enum tk_offsets tk_offset = TK_OFFS_TAI; + u64 cycletime = 0, basetime = 0, cycletime_ext = 0; + struct tcf_gate_params *p = NULL, *old_p = NULL; + enum tk_offsets old_tk_offset = TK_OFFS_TAI; + const struct tcf_gate_params *cur_p = NULL; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; + enum tk_offsets tko = TK_OFFS_TAI; struct tcf_chain *goto_ch = NULL; - u64 cycletime = 0, basetime = 0; - struct tcf_gate_params *p; + s32 timer_clockid = CLOCK_TAI; + bool use_old_entries = false; + s32 old_clockid = CLOCK_TAI; + bool need_cancel = false; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; + u64 old_basetime = 0; int ret = 0, err; u32 gflags = 0; s32 prio = -1; @@ -321,26 +382,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!tb[TCA_GATE_PARMS]) return -EINVAL; - if (tb[TCA_GATE_CLOCKID]) { + if (tb[TCA_GATE_CLOCKID]) clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); - switch (clockid) { - case CLOCK_REALTIME: - tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - return -EINVAL; - } - } parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; @@ -366,6 +409,60 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return -EEXIST; } + gact = to_gate(*a); + + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto chain_put; + } + INIT_LIST_HEAD(&p->entries); + + use_old_entries = !tb[TCA_GATE_ENTRY_LIST]; + if (!use_old_entries) { + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); + if (err < 0) + goto err_free; + use_old_entries = !err; + } + + if (ret == ACT_P_CREATED && use_old_entries) { + NL_SET_ERR_MSG(extack, "The entry list is empty"); + err = -EINVAL; + goto err_free; + } + + if (ret != ACT_P_CREATED) { + rcu_read_lock(); + cur_p = rcu_dereference(gact->param); + + old_basetime = cur_p->tcfg_basetime; + old_clockid = cur_p->tcfg_clockid; + old_tk_offset = READ_ONCE(gact->tk_offset); + + basetime = old_basetime; + cycletime_ext = cur_p->tcfg_cycletime_ext; + prio = cur_p->tcfg_priority; + gflags = cur_p->tcfg_flags; + + if (!tb[TCA_GATE_CLOCKID]) + clockid = old_clockid; + + err = 0; + if (use_old_entries) { + err = tcf_gate_copy_entries(p, cur_p, extack); + if (!err && !tb[TCA_GATE_CYCLE_TIME]) + cycletime = cur_p->tcfg_cycletime; + } + rcu_read_unlock(); + if (err) + goto err_free; + } + if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); @@ -375,25 +472,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); - gact = to_gate(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&gact->param.entries); - - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); - if (err < 0) - goto release_idr; - - spin_lock_bh(&gact->tcf_lock); - p = &gact->param; - if (tb[TCA_GATE_CYCLE_TIME]) cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - if (tb[TCA_GATE_ENTRY_LIST]) { - err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); - if (err < 0) - goto chain_put; - } + if (tb[TCA_GATE_CYCLE_TIME_EXT]) + cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + + err = gate_clock_resolve(clockid, &tko, extack); + if (err) + goto err_free; + timer_clockid = clockid; + + need_cancel = ret != ACT_P_CREATED && + gate_timer_needs_cancel(basetime, old_basetime, + tko, old_tk_offset, + timer_clockid, old_clockid); + + if (need_cancel) + hrtimer_cancel(&gact->hitimer); + + spin_lock_bh(&gact->tcf_lock); if (!cycletime) { struct tcfg_gate_entry *entry; @@ -402,22 +500,20 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, list_for_each_entry(entry, &p->entries, list) cycle = ktime_add_ns(cycle, entry->interval); cycletime = cycle; - if (!cycletime) { - err = -EINVAL; - goto chain_put; - } } p->tcfg_cycletime = cycletime; + p->tcfg_cycletime_ext = cycletime_ext; - if (tb[TCA_GATE_CYCLE_TIME_EXT]) - p->tcfg_cycletime_ext = - nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); - - gate_setup_timer(gact, basetime, tk_offset, clockid, - ret == ACT_P_CREATED); + if (need_cancel || ret == ACT_P_CREATED) + gate_setup_timer(gact, timer_clockid, tko); p->tcfg_priority = prio; p->tcfg_flags = gflags; - gate_get_start_time(gact, &start); + p->tcfg_basetime = basetime; + p->tcfg_clockid = timer_clockid; + gate_get_start_time(gact, p, &start); + + old_p = rcu_replace_pointer(gact->param, p, + lockdep_is_held(&gact->tcf_lock)); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; @@ -434,11 +530,15 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); + if (old_p) + call_rcu(&old_p->rcu, tcf_gate_params_free_rcu); + return ret; +err_free: + release_entry_list(&p->entries); + kfree(p); chain_put: - spin_unlock_bh(&gact->tcf_lock); - if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: @@ -446,21 +546,29 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, * without taking tcf_lock. */ if (ret == ACT_P_CREATED) - gate_setup_timer(gact, gact->param.tcfg_basetime, - gact->tk_offset, gact->param.tcfg_clockid, - true); + gate_setup_timer(gact, timer_clockid, tko); + tcf_idr_release(*a, bind); return err; } +static void tcf_gate_params_free_rcu(struct rcu_head *head) +{ + struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu); + + release_entry_list(&p->entries); + kfree(p); +} + static void tcf_gate_cleanup(struct tc_action *a) { struct tcf_gate *gact = to_gate(a); struct tcf_gate_params *p; - p = &gact->param; hrtimer_cancel(&gact->hitimer); - release_entry_list(&p->entries); + p = rcu_dereference_protected(gact->param, 1); + if (p) + call_rcu(&p->rcu, tcf_gate_params_free_rcu); } static int dumping_entry(struct sk_buff *skb, @@ -509,10 +617,9 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, struct nlattr *entry_list; struct tcf_t t; - spin_lock_bh(&gact->tcf_lock); - opt.action = gact->tcf_action; - - p = &gact->param; + rcu_read_lock(); + opt.action = READ_ONCE(gact->tcf_action); + p = rcu_dereference(gact->param); if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -552,12 +659,12 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) goto nla_put_failure; - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&gact->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; }
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 79df81d..d5e8a91 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c
@@ -293,8 +293,8 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held) /* called when adding new meta information */ static int __add_metainfo(const struct tcf_meta_ops *ops, - struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) + struct tcf_ife_params *p, u32 metaid, void *metaval, + int len, bool atomic) { struct tcf_meta_info *mi = NULL; int ret = 0; @@ -313,45 +313,40 @@ static int __add_metainfo(const struct tcf_meta_ops *ops, } } - if (exists) - spin_lock_bh(&ife->tcf_lock); - list_add_tail(&mi->metalist, &ife->metalist); - if (exists) - spin_unlock_bh(&ife->tcf_lock); + list_add_tail(&mi->metalist, &p->metalist); return ret; } static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, - struct tcf_ife_info *ife, u32 metaid, - bool exists) + struct tcf_ife_params *p, u32 metaid) { int ret; if (!try_module_get(ops->owner)) return -ENOENT; - ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); + ret = __add_metainfo(ops, p, metaid, NULL, 0, true); if (ret) module_put(ops->owner); return ret; } -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool exists) +static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval, + int len) { const struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret; if (!ops) return -ENOENT; - ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + ret = __add_metainfo(ops, p, metaid, metaval, len, false); if (ret) /*put back what find_ife_oplist took */ module_put(ops->owner); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife, bool exists) +static int use_all_metadata(struct tcf_ife_params *p) { struct tcf_meta_ops *o; int rc = 0; @@ -359,7 +354,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); + rc = add_metainfo_and_get_ops(o, p, o->metaid); if (rc == 0) installed += 1; } @@ -371,7 +366,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) return -EINVAL; } -static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) +static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p) { struct tcf_meta_info *e; struct nlattr *nest; @@ -379,14 +374,14 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) int total_encoded = 0; /*can only happen on decode */ - if (list_empty(&ife->metalist)) + if (list_empty(&p->metalist)) return 0; nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry(e, &p->metalist, metalist) { if (!e->ops->get(skb, e)) total_encoded += 1; } @@ -403,13 +398,11 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) return -1; } -/* under ife->tcf_lock */ -static void _tcf_ife_cleanup(struct tc_action *a) +static void __tcf_ife_cleanup(struct tcf_ife_params *p) { - struct tcf_ife_info *ife = to_ife(a); struct tcf_meta_info *e, *n; - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { + list_for_each_entry_safe(e, n, &p->metalist, metalist) { list_del(&e->metalist); if (e->metaval) { if (e->ops->release) @@ -422,18 +415,23 @@ static void _tcf_ife_cleanup(struct tc_action *a) } } +static void tcf_ife_cleanup_params(struct rcu_head *head) +{ + struct tcf_ife_params *p = container_of(head, struct tcf_ife_params, + rcu); + + __tcf_ife_cleanup(p); + kfree(p); +} + static void tcf_ife_cleanup(struct tc_action *a) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - spin_lock_bh(&ife->tcf_lock); - _tcf_ife_cleanup(a); - spin_unlock_bh(&ife->tcf_lock); - p = rcu_dereference_protected(ife->params, 1); if (p) - kfree_rcu(p, rcu); + call_rcu(&p->rcu, tcf_ife_cleanup_params); } static int load_metalist(struct nlattr **tb, bool rtnl_held) @@ -455,8 +453,7 @@ static int load_metalist(struct nlattr **tb, bool rtnl_held) return 0; } -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, - bool exists, bool rtnl_held) +static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb) { int len = 0; int rc = 0; @@ -468,7 +465,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(p, i, val, len); if (rc) return rc; } @@ -523,6 +520,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, p = kzalloc_obj(*p); if (!p) return -ENOMEM; + INIT_LIST_HEAD(&p->metalist); if (tb[TCA_IFE_METALST]) { err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, @@ -567,8 +565,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); - if (ret == ACT_P_CREATED) - INIT_LIST_HEAD(&ife->metalist); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) @@ -600,8 +596,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = populate_metalist(ife, tb2, exists, - !(flags & TCA_ACT_FLAGS_NO_RTNL)); + err = populate_metalist(p, tb2); if (err) goto metadata_parse_err; } else { @@ -610,7 +605,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife, exists); + err = use_all_metadata(p); if (err) goto metadata_parse_err; } @@ -626,13 +621,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); if (p) - kfree_rcu(p, rcu); + call_rcu(&p->rcu, tcf_ife_cleanup_params); return ret; metadata_parse_err: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: + __tcf_ife_cleanup(p); kfree(p); tcf_idr_release(*a, bind); return err; @@ -679,7 +675,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; - if (dump_metalist(skb, ife)) { + if (dump_metalist(skb, p)) { /*ignore failure to dump metalist */ pr_info("Failed to dump metalist\n"); } @@ -693,13 +689,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, return -1; } -static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, +static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p, u16 metaid, u16 mlen, void *mdata) { struct tcf_meta_info *e; /* XXX: use hash to speed up */ - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (metaid == e->metaid) { if (e->ops) { /* We check for decode presence already */ @@ -716,10 +712,13 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; + struct tcf_ife_params *p; u8 *ifehdr_end; u8 *tlv_data; u16 metalen; + p = rcu_dereference_bh(ife->params); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); @@ -745,7 +744,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { + if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ @@ -769,12 +768,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, /*XXX: check if we can do this at install time instead of current * send data path **/ -static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) +static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p) { - struct tcf_meta_info *e, *n; + struct tcf_meta_info *e; int tot_run_sz = 0, run_sz = 0; - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (e->ops->check_presence) { run_sz = e->ops->check_presence(skb, e); tot_run_sz += run_sz; @@ -795,7 +794,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA where ORIGDATA = original ethernet header ... */ - u16 metalen = ife_get_sz(skb, ife); + u16 metalen = ife_get_sz(skb, p); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; unsigned int skboff = 0; int new_len = skb->len + hdrm; @@ -833,25 +832,21 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (!ife_meta) goto drop; - spin_lock(&ife->tcf_lock); - /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... */ - list_for_each_entry(e, &ife->metalist, metalist) { + list_for_each_entry_rcu(e, &p->metalist, metalist) { if (e->ops->encode) { err = e->ops->encode(skb, (void *)(ife_meta + skboff), e); } if (err < 0) { /* too corrupt to keep around if overwritten */ - spin_unlock(&ife->tcf_lock); goto drop; } skboff += err; } - spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; if (!is_zero_ether_addr(p->eth_src))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 343309e..20f7f9e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c
@@ -2228,6 +2228,11 @@ static bool is_qdisc_ingress(__u32 classid) return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); } +static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q) +{ + return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS)); +} + static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2420,6 +2425,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, flags |= TCA_ACT_FLAGS_NO_RTNL; if (is_qdisc_ingress(parent)) flags |= TCA_ACT_FLAGS_AT_INGRESS; + if (is_ingress_or_clsact(block, q)) + flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { @@ -2962,6 +2969,7 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_handle = 0; + tcm->tcm_info = 0; if (block->q) { tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; tcm->tcm_parent = block->q->handle;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 339c664..ab364e4 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c
@@ -503,8 +503,16 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } if (TC_H_MAJ(baseclass) == 0) { - struct Qdisc *q = tcf_block_q(tp->chain->block); + struct tcf_block *block = tp->chain->block; + struct Qdisc *q; + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, + "Must specify baseclass when attaching flow filter to block"); + goto err2; + } + + q = tcf_block_q(block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index be81c10..23884ef 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c
@@ -247,8 +247,18 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_FW_MAX + 1]; int err; - if (!opt) - return handle ? -EINVAL : 0; /* Succeed if it is old method. */ + if (!opt) { + if (handle) + return -EINVAL; + + if (tcf_block_shared(tp->chain->block)) { + NL_SET_ERR_MSG(extack, + "Must specify mark when attaching fw filter to block"); + return -EINVAL; + } + + return 0; /* Succeed if it is old method. */ + } err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, NULL);
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a01f14b..9efe23f 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c
@@ -391,8 +391,8 @@ static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = { 1239850263, 1191209601, 1147878294, 1108955788 }; -static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, - u64 target_ns, u64 rtt_est_ns); +static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); + /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * @@ -2013,7 +2013,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) u64 delay; u32 len; - if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) { + if (q->config->is_shared && q->rate_ns && + now - q->last_checked_active >= q->config->sync_time) { struct net_device *dev = qdisc_dev(sch); struct cake_sched_data *other_priv; u64 new_rate = q->config->rate_bps; @@ -2039,12 +2040,9 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) if (num_active_qs > 1) new_rate = div64_u64(q->config->rate_bps, num_active_qs); - /* mtu = 0 is used to only update the rate and not mess with cobalt params */ - cake_set_rate(b, new_rate, 0, 0, 0); + cake_configure_rates(sch, new_rate, true); q->last_checked_active = now; q->active_queues = num_active_qs; - q->rate_ns = b->tin_rate_ns; - q->rate_shft = b->tin_rate_shft; } begin: @@ -2361,12 +2359,10 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, b->cparams.p_dec = 1 << 20; /* 1/4096 */ } -static int cake_config_besteffort(struct Qdisc *sch) +static int cake_config_besteffort(struct Qdisc *sch, u64 rate, u32 mtu) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[0]; - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; q->tin_cnt = 1; @@ -2380,12 +2376,10 @@ static int cake_config_besteffort(struct Qdisc *sch) return 0; } -static int cake_config_precedence(struct Qdisc *sch) +static int cake_config_precedence(struct Qdisc *sch, u64 rate, u32 mtu) { /* convert high-level (user visible) parameters into internal format */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 256; u32 i; @@ -2456,7 +2450,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Total 12 traffic classes. */ -static int cake_config_diffserv8(struct Qdisc *sch) +static int cake_config_diffserv8(struct Qdisc *sch, u64 rate, u32 mtu) { /* Pruned list of traffic classes for typical applications: * @@ -2473,8 +2467,6 @@ static int cake_config_diffserv8(struct Qdisc *sch) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 256; u32 i; @@ -2504,7 +2496,7 @@ static int cake_config_diffserv8(struct Qdisc *sch) return 0; } -static int cake_config_diffserv4(struct Qdisc *sch) +static int cake_config_diffserv4(struct Qdisc *sch, u64 rate, u32 mtu) { /* Further pruned list of traffic classes for four-class system: * @@ -2517,8 +2509,6 @@ static int cake_config_diffserv4(struct Qdisc *sch) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 1024; q->tin_cnt = 4; @@ -2546,7 +2536,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) return 0; } -static int cake_config_diffserv3(struct Qdisc *sch) +static int cake_config_diffserv3(struct Qdisc *sch, u64 rate, u32 mtu) { /* Simplified Diffserv structure with 3 tins. * Latency Sensitive (CS7, CS6, EF, VA, TOS4) @@ -2554,8 +2544,6 @@ static int cake_config_diffserv3(struct Qdisc *sch) * Low Priority (LE, CS1) */ struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->config->rate_bps; u32 quantum = 1024; q->tin_cnt = 3; @@ -2580,32 +2568,33 @@ static int cake_config_diffserv3(struct Qdisc *sch) return 0; } -static void cake_reconfigure(struct Qdisc *sch) +static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust) { + u32 mtu = likely(rate_adjust) ? 0 : psched_mtu(qdisc_dev(sch)); struct cake_sched_data *qd = qdisc_priv(sch); struct cake_sched_config *q = qd->config; int c, ft; switch (q->tin_mode) { case CAKE_DIFFSERV_BESTEFFORT: - ft = cake_config_besteffort(sch); + ft = cake_config_besteffort(sch, rate, mtu); break; case CAKE_DIFFSERV_PRECEDENCE: - ft = cake_config_precedence(sch); + ft = cake_config_precedence(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV8: - ft = cake_config_diffserv8(sch); + ft = cake_config_diffserv8(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV4: - ft = cake_config_diffserv4(sch); + ft = cake_config_diffserv4(sch, rate, mtu); break; case CAKE_DIFFSERV_DIFFSERV3: default: - ft = cake_config_diffserv3(sch); + ft = cake_config_diffserv3(sch, rate, mtu); break; } @@ -2616,6 +2605,14 @@ static void cake_reconfigure(struct Qdisc *sch) qd->rate_ns = qd->tins[ft].tin_rate_ns; qd->rate_shft = qd->tins[ft].tin_rate_shft; +} + +static void cake_reconfigure(struct Qdisc *sch) +{ + struct cake_sched_data *qd = qdisc_priv(sch); + struct cake_sched_config *q = qd->config; + + cake_configure_rates(sch, qd->config->rate_bps, false); if (q->buffer_config_limit) { qd->buffer_limit = q->buffer_config_limit;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 306e046..a4b07b6 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c
@@ -115,12 +115,12 @@ static void ets_offload_change(struct Qdisc *sch) struct ets_sched *q = qdisc_priv(sch); struct tc_ets_qopt_offload qopt; unsigned int w_psum_prev = 0; - unsigned int q_psum = 0; - unsigned int q_sum = 0; unsigned int quantum; unsigned int w_psum; unsigned int weight; unsigned int i; + u64 q_psum = 0; + u64 q_sum = 0; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; @@ -138,8 +138,12 @@ static void ets_offload_change(struct Qdisc *sch) for (i = 0; i < q->nbands; i++) { quantum = q->classes[i].quantum; - q_psum += quantum; - w_psum = quantum ? q_psum * 100 / q_sum : 0; + if (quantum) { + q_psum += quantum; + w_psum = div64_u64(q_psum * 100, q_sum); + } else { + w_psum = 0; + } weight = w_psum - w_psum_prev; w_psum_prev = w_psum;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 80235e8..05084c9 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c
@@ -827,6 +827,7 @@ static void fq_reset(struct Qdisc *sch) for (idx = 0; idx < FQ_BANDS; idx++) { q->band_flows[idx].new_flows.first = NULL; q->band_flows[idx].old_flows.first = NULL; + q->band_pkt_count[idx] = 0; } q->delayed = RB_ROOT; q->flows = 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 98ffe64..9e726c3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c
@@ -1288,33 +1288,6 @@ static void dev_deactivate_queue(struct net_device *dev, } } -static void dev_reset_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_unused) -{ - struct Qdisc *qdisc; - bool nolock; - - qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); - if (!qdisc) - return; - - nolock = qdisc->flags & TCQ_F_NOLOCK; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); - - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) { - clear_bit(__QDISC_STATE_MISSED, &qdisc->state); - clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); - spin_unlock_bh(&qdisc->seqlock); - } -} - static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b5657ff..83b2ca2 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c
@@ -555,7 +555,7 @@ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) { u64 y1, y2, dx, dy; - u32 dsm; + u64 dsm; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ @@ -598,7 +598,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) */ dx = (y1 - y) << SM_SHIFT; dsm = isc->sm1 - isc->sm2; - do_div(dx, dsm); + dx = div64_u64(dx, dsm); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset.
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cc6051d..c3e18ba 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c
@@ -113,14 +113,15 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress); + struct bpf_mprog_entry *entry; if (sch->parent != TC_H_INGRESS) return; tcf_block_put_ext(q->block, sch, &q->block_info); - if (entry) { + if (mini_qdisc_pair_inited(&q->miniqp)) { + entry = rtnl_dereference(dev->tcx_ingress); tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); @@ -290,10 +291,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, static void clsact_destroy(struct Qdisc *sch) { + struct bpf_mprog_entry *ingress_entry, *egress_entry; struct clsact_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress); - struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress); if (sch->parent != TC_H_CLSACT) return; @@ -301,7 +301,8 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); - if (ingress_entry) { + if (mini_qdisc_pair_inited(&q->miniqp_ingress)) { + ingress_entry = rtnl_dereference(dev->tcx_ingress); tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); @@ -309,7 +310,8 @@ static void clsact_destroy(struct Qdisc *sch) } } - if (egress_entry) { + if (mini_qdisc_pair_inited(&q->miniqp_egress)) { + egress_entry = rtnl_dereference(dev->tcx_egress); tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5de1c93..20df1c0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c
@@ -519,8 +519,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto finish_segs; } - skb->data[get_random_u32_below(skb_headlen(skb))] ^= - 1<<get_random_u32_below(8); + if (skb_headlen(skb)) + skb->data[get_random_u32_below(skb_headlen(skb))] ^= + 1 << get_random_u32_below(8); } if (unlikely(q->t_len >= sch->limit)) {
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6e4bdaa..ec4039a 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c
@@ -146,15 +146,12 @@ teql_destroy(struct Qdisc *sch) master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { struct netdev_queue *txq; - spinlock_t *root_lock; txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); - spin_lock_bh(root_lock); - qdisc_reset(rtnl_dereference(txq->qdisc)); - spin_unlock_bh(root_lock); + dev_reset_queue(master->dev, + txq, NULL); } } skb_queue_purge(&dat->q); @@ -315,6 +312,7 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); + skb->dev = slave; if (!netif_xmit_frozen_or_stopped(slave_txq) && netdev_start_xmit(skb, slave, slave_txq, false) == NETDEV_TX_OK) {
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 005bfc7..94bc9c7 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c
@@ -36,24 +36,6 @@ static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) return &((struct net_shaper_nl_ctx *)ctx)->binding; } -static void net_shaper_lock(struct net_shaper_binding *binding) -{ - switch (binding->type) { - case NET_SHAPER_BINDING_TYPE_NETDEV: - netdev_lock(binding->netdev); - break; - } -} - -static void net_shaper_unlock(struct net_shaper_binding *binding) -{ - switch (binding->type) { - case NET_SHAPER_BINDING_TYPE_NETDEV: - netdev_unlock(binding->netdev); - break; - } -} - static struct net_shaper_hierarchy * net_shaper_hierarchy(struct net_shaper_binding *binding) { @@ -65,6 +47,21 @@ net_shaper_hierarchy(struct net_shaper_binding *binding) return NULL; } +static struct net_shaper_hierarchy * +net_shaper_hierarchy_rcu(struct net_shaper_binding *binding) +{ + /* Readers look up the device and take a ref, then take RCU lock + * later at which point netdev may have been unregistered and flushed. + * READ_ONCE() pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. + */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV && + READ_ONCE(binding->netdev->reg_state) <= NETREG_REGISTERED) + return READ_ONCE(binding->netdev->net_shaper_hierarchy); + + /* No other type supported yet. */ + return NULL; +} + static const struct net_shaper_ops * net_shaper_ops(struct net_shaper_binding *binding) { @@ -204,12 +201,49 @@ static int net_shaper_ctx_setup(const struct genl_info *info, int type, return 0; } +/* Like net_shaper_ctx_setup(), but for "write" handlers (never for dumps!) + * Acquires the lock protecting the hierarchy (instance lock for netdev). + */ +static int net_shaper_ctx_setup_lock(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index_lock(ns, ifindex); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_unlock(dev); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) { if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) netdev_put(ctx->binding.netdev, &ctx->dev_tracker); } +static void net_shaper_ctx_cleanup_unlock(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_unlock(ctx->binding.netdev); +} + static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) { return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | @@ -251,9 +285,10 @@ static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, const struct net_shaper_handle *handle) { - struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); u32 index = net_shaper_handle_to_index(handle); + struct net_shaper_hierarchy *hierarchy; + hierarchy = net_shaper_hierarchy_rcu(binding); if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID)) return NULL; @@ -262,7 +297,7 @@ net_shaper_lookup(struct net_shaper_binding *binding, } /* Allocate on demand the per device shaper's hierarchy container. - * Called under the net shaper lock + * Called under the lock protecting the hierarchy (instance lock for netdev) */ static struct net_shaper_hierarchy * net_shaper_hierarchy_setup(struct net_shaper_binding *binding) @@ -681,6 +716,22 @@ void net_shaper_nl_post_doit(const struct genl_split_ops *ops, net_shaper_generic_post(info); } +int net_shaper_nl_pre_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx)); + + return net_shaper_ctx_setup_lock(info, NET_SHAPER_A_IFINDEX, ctx); +} + +void net_shaper_nl_post_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_ctx_cleanup_unlock((struct net_shaper_nl_ctx *)info->ctx); +} + int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) { struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; @@ -759,11 +810,7 @@ int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_msg; - ret = genlmsg_reply(msg, info); - if (ret) - goto free_msg; - - return 0; + return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); @@ -782,17 +829,19 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, /* Don't error out dumps performed before any set operation. */ binding = net_shaper_binding_from_ctx(ctx); - hierarchy = net_shaper_hierarchy(binding); - if (!hierarchy) - return 0; rcu_read_lock(); + hierarchy = net_shaper_hierarchy_rcu(binding); + if (!hierarchy) + goto out_unlock; + for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, U32_MAX, XA_PRESENT)); ctx->start_index++) { ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; } +out_unlock: rcu_read_unlock(); return ret; @@ -810,45 +859,38 @@ int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) binding = net_shaper_binding_from_ctx(info->ctx); - net_shaper_lock(binding); ret = net_shaper_parse_info(binding, info->attrs, info, &shaper, &exists); if (ret) - goto unlock; + return ret; if (!exists) net_shaper_default_parent(&shaper.handle, &shaper.parent); hierarchy = net_shaper_hierarchy_setup(binding); - if (!hierarchy) { - ret = -ENOMEM; - goto unlock; - } + if (!hierarchy) + return -ENOMEM; /* The 'set' operation can't create node-scope shapers. */ handle = shaper.handle; if (handle.scope == NET_SHAPER_SCOPE_NODE && - !net_shaper_lookup(binding, &handle)) { - ret = -ENOENT; - goto unlock; - } + !net_shaper_lookup(binding, &handle)) + return -ENOENT; ret = net_shaper_pre_insert(binding, &handle, info->extack); if (ret) - goto unlock; + return ret; ops = net_shaper_ops(binding); ret = ops->set(binding, &shaper, info->extack); if (ret) { net_shaper_rollback(binding); - goto unlock; + return ret; } net_shaper_commit(binding, 1, &shaper); -unlock: - net_shaper_unlock(binding); - return ret; + return 0; } static int __net_shaper_delete(struct net_shaper_binding *binding, @@ -1076,35 +1118,26 @@ int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) binding = net_shaper_binding_from_ctx(info->ctx); - net_shaper_lock(binding); ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, &handle); if (ret) - goto unlock; + return ret; hierarchy = net_shaper_hierarchy(binding); - if (!hierarchy) { - ret = -ENOENT; - goto unlock; - } + if (!hierarchy) + return -ENOENT; shaper = net_shaper_lookup(binding, &handle); - if (!shaper) { - ret = -ENOENT; - goto unlock; - } + if (!shaper) + return -ENOENT; if (handle.scope == NET_SHAPER_SCOPE_NODE) { ret = net_shaper_pre_del_node(binding, shaper, info->extack); if (ret) - goto unlock; + return ret; } - ret = __net_shaper_delete(binding, shaper, info->extack); - -unlock: - net_shaper_unlock(binding); - return ret; + return __net_shaper_delete(binding, shaper, info->extack); } static int net_shaper_group_send_reply(struct net_shaper_binding *binding, @@ -1153,21 +1186,17 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (!net_shaper_ops(binding)->group) return -EOPNOTSUPP; - net_shaper_lock(binding); leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES); if (!leaves_count) { NL_SET_BAD_ATTR(info->extack, info->attrs[NET_SHAPER_A_LEAVES]); - ret = -EINVAL; - goto unlock; + return -EINVAL; } leaves = kcalloc(leaves_count, sizeof(struct net_shaper) + sizeof(struct net_shaper *), GFP_KERNEL); - if (!leaves) { - ret = -ENOMEM; - goto unlock; - } + if (!leaves) + return -ENOMEM; old_nodes = (void *)&leaves[leaves_count]; ret = net_shaper_parse_node(binding, info->attrs, info, &node); @@ -1244,9 +1273,6 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) free_leaves: kfree(leaves); - -unlock: - net_shaper_unlock(binding); return ret; free_msg: @@ -1313,10 +1339,7 @@ int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_msg; - ret = genlmsg_reply(msg, info); - if (ret) - goto free_msg; - return 0; + return genlmsg_reply(msg, info); free_msg: nlmsg_free(msg); @@ -1359,14 +1382,12 @@ static void net_shaper_flush(struct net_shaper_binding *binding) if (!hierarchy) return; - net_shaper_lock(binding); xa_lock(&hierarchy->shapers); xa_for_each(&hierarchy->shapers, index, cur) { __xa_erase(&hierarchy->shapers, index); kfree(cur); } xa_unlock(&hierarchy->shapers); - net_shaper_unlock(binding); kfree(hierarchy); }
diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c index e8cccc4..9b29be3 100644 --- a/net/shaper/shaper_nl_gen.c +++ b/net/shaper/shaper_nl_gen.c
@@ -99,27 +99,27 @@ static const struct genl_split_ops net_shaper_nl_ops[] = { }, { .cmd = NET_SHAPER_CMD_SET, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_set_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_set_nl_policy, .maxattr = NET_SHAPER_A_IFINDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = NET_SHAPER_CMD_DELETE, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_delete_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_delete_nl_policy, .maxattr = NET_SHAPER_A_IFINDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = NET_SHAPER_CMD_GROUP, - .pre_doit = net_shaper_nl_pre_doit, + .pre_doit = net_shaper_nl_pre_doit_write, .doit = net_shaper_nl_group_doit, - .post_doit = net_shaper_nl_post_doit, + .post_doit = net_shaper_nl_post_doit_write, .policy = net_shaper_group_nl_policy, .maxattr = NET_SHAPER_A_LEAVES, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h index ec41c90..42c46c5 100644 --- a/net/shaper/shaper_nl_gen.h +++ b/net/shaper/shaper_nl_gen.h
@@ -18,12 +18,17 @@ extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGH int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_pre_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void +net_shaper_nl_post_doit_write(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int net_shaper_nl_pre_dumpit(struct netlink_callback *cb);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7072556..1a56509 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c
@@ -124,12 +124,21 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct smc_sock *smc; struct sock *child; - smc = smc_clcsock_user_data(sk); + rcu_read_lock(); + smc = smc_clcsock_user_data_rcu(sk); + if (!smc || !refcount_inc_not_zero(&smc->sk.sk_refcnt)) { + rcu_read_unlock(); + smc = NULL; + goto drop; + } + rcu_read_unlock(); if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) > sk->sk_max_ack_backlog) @@ -142,7 +151,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, /* passthrough to original syn recv sock fct */ child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, - own_req); + own_req, opt_child_init); /* child must not inherit smc or its ops */ if (child) { rcu_assign_sk_user_data(child, NULL); @@ -151,11 +160,14 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops) inet_csk(child)->icsk_af_ops = smc->ori_af_ops; } + sock_put(&smc->sk); return child; drop: dst_release(dst); tcp_listendrop(sk); + if (smc) + sock_put(&smc->sk); return NULL; } @@ -252,7 +264,7 @@ static void smc_fback_restore_callbacks(struct smc_sock *smc) struct sock *clcsk = smc->clcsock->sk; write_lock_bh(&clcsk->sk_callback_lock); - clcsk->sk_user_data = NULL; + rcu_assign_sk_user_data(clcsk, NULL); smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); @@ -900,7 +912,7 @@ static void smc_fback_replace_callbacks(struct smc_sock *smc) struct sock *clcsk = smc->clcsock->sk; write_lock_bh(&clcsk->sk_callback_lock); - clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + __rcu_assign_sk_user_data_with_flags(clcsk, smc, SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, &smc->clcsk_state_change); @@ -2663,8 +2675,8 @@ int smc_listen(struct socket *sock, int backlog) * smc-specific sk_data_ready function */ write_lock_bh(&smc->clcsock->sk->sk_callback_lock); - smc->clcsock->sk->sk_user_data = - (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + __rcu_assign_sk_user_data_with_flags(smc->clcsock->sk, smc, + SK_USER_DATA_NOCOPY); smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, smc_clcsock_data_ready, &smc->clcsk_data_ready); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); @@ -2685,10 +2697,11 @@ int smc_listen(struct socket *sock, int backlog) write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); - smc->clcsock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(smc->clcsock->sk, NULL); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); goto out; } + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN;
diff --git a/net/smc/smc.h b/net/smc/smc.h index 9e6af72..52145df 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h
@@ -346,6 +346,11 @@ static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } +static inline struct smc_sock *smc_clcsock_user_data_rcu(const struct sock *clcsk) +{ + return (struct smc_sock *)rcu_dereference_sk_user_data(clcsk); +} + /* save target_cb in saved_cb, and replace target_cb with new_cb */ static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), void (*new_cb)(struct sock *),
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 10219f55..bb0313e 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c
@@ -218,7 +218,7 @@ int smc_close_active(struct smc_sock *smc) write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, &smc->clcsk_data_ready); - smc->clcsock->sk->sk_user_data = NULL; + rcu_assign_sk_user_data(smc->clcsock->sk, NULL); write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); }
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index d833e36..c1d9b92 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c
@@ -135,9 +135,16 @@ static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, sock_put(sk); } +static bool smc_rx_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + /* smc_spd_priv in buf->private is not shareable; disallow cloning. */ + return false; +} + static const struct pipe_buf_operations smc_pipe_ops = { .release = smc_rx_pipe_buf_release, - .get = generic_pipe_buf_get + .get = smc_rx_pipe_buf_get, }; static void smc_rx_spd_release(struct splice_pipe_desc *spd,
diff --git a/net/socket.c b/net/socket.c index 136b98c..0595218 100644 --- a/net/socket.c +++ b/net/socket.c
@@ -674,7 +674,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) iput(SOCK_INODE(sock)); return; } - sock->file = NULL; + WRITE_ONCE(sock->file, NULL); } /**
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 237f67a..ef8b7e8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c
@@ -1062,14 +1062,25 @@ static int cache_release(struct inode *inode, struct file *filp, struct cache_reader *rp = filp->private_data; if (rp) { + struct cache_request *rq = NULL; + spin_lock(&queue_lock); if (rp->offset) { struct cache_queue *cq; - for (cq= &rp->q; &cq->list != &cd->queue; - cq = list_entry(cq->list.next, struct cache_queue, list)) + for (cq = &rp->q; &cq->list != &cd->queue; + cq = list_entry(cq->list.next, + struct cache_queue, list)) if (!cq->reader) { - container_of(cq, struct cache_request, q) - ->readers--; + struct cache_request *cr = + container_of(cq, + struct cache_request, q); + cr->readers--; + if (cr->readers == 0 && + !test_bit(CACHE_PENDING, + &cr->item->flags)) { + list_del(&cr->q.list); + rq = cr; + } break; } rp->offset = 0; @@ -1077,9 +1088,14 @@ static int cache_release(struct inode *inode, struct file *filp, list_del(&rp->q.list); spin_unlock(&queue_lock); + if (rq) { + cache_put(rq->item, cd); + kfree(rq->buf); + kfree(rq); + } + filp->private_data = NULL; kfree(rp); - } if (filp->f_mode & FMODE_WRITE) { atomic_dec(&cd->writers);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 15bbf953..b51a162 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1362,7 +1362,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) needed += RPCRDMA_MAX_RECV_BATCH; if (atomic_inc_return(&ep->re_receiving) > 1) - goto out; + goto out_dec; /* fast path: all needed reps can be found on the free list */ wr = NULL; @@ -1385,7 +1385,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) ++count; } if (!wr) - goto out; + goto out_dec; rc = ib_post_recv(ep->re_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); @@ -1400,9 +1400,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) --count; } } + +out_dec: if (atomic_dec_return(&ep->re_receiving) > 0) complete(&ep->re_done); - out: trace_xprtrdma_post_recvs(r_xprt, count); ep->re_receive_count += count;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 8e129cf..253c72d 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c
@@ -348,7 +348,8 @@ static bool tipc_service_insert_publ(struct net *net, /* Return if the publication already exists */ list_for_each_entry(_p, &sr->all_publ, all_publ) { - if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + if (_p->key == key && _p->sk.ref == p->sk.ref && + (!_p->sk.node || _p->sk.node == node)) { pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", p->sr.type, p->sr.lower, p->sr.upper, node, p->sk.ref, key); @@ -388,7 +389,8 @@ static struct publication *tipc_service_remove_publ(struct service_range *r, u32 node = sk->node; list_for_each_entry(p, &r->all_publ, all_publ) { - if (p->key != key || (node && node != p->sk.node)) + if (p->key != key || p->sk.ref != sk->ref || + (node && node != p->sk.node)) continue; list_del(&p->all_publ); list_del(&p->local_publ);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4c618c2..9329919 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c
@@ -2233,6 +2233,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, if (skb_queue_empty(&sk->sk_write_queue)) break; get_random_bytes(&delay, 2); + if (tsk->conn_timeout < 4) + tsk->conn_timeout = 4; delay %= (tsk->conn_timeout / 4); delay = msecs_to_jiffies(delay + 100); sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7877c23..dd9dda7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c
@@ -246,6 +246,7 @@ static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); + __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } @@ -2225,7 +2226,6 @@ int tls_sw_recvmsg(struct sock *sk, /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); - __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) @@ -2533,7 +2533,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); - cancel_delayed_work_sync(&ctx->tx_work.work); + disable_delayed_work_sync(&ctx->tx_work.work); } void tls_sw_release_resources_tx(struct sock *sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3756a93..b23c33d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c
@@ -1785,7 +1785,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr_unsized *uad __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sock_put(other); return 0; @@ -1958,6 +1958,8 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + unix_peek_fpl(scm->fp); } static void unix_destruct_scm(struct sk_buff *skb) @@ -2278,7 +2280,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sock_put(other); scm_destroy(&scm); return len; @@ -2351,7 +2353,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, sk_send_sigurg(other); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); return 0; out_unlock: @@ -2477,7 +2479,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); - other->sk_data_ready(other); + READ_ONCE(other->sk_data_ready)(other); sent += size; }
diff --git a/net/unix/af_unix.h b/net/unix/af_unix.h index c4f1b2d..8119dbe 100644 --- a/net/unix/af_unix.h +++ b/net/unix/af_unix.h
@@ -29,6 +29,7 @@ void unix_del_edges(struct scm_fp_list *fpl); void unix_update_edges(struct unix_sock *receiver); int unix_prepare_fpl(struct scm_fp_list *fpl); void unix_destroy_fpl(struct scm_fp_list *fpl); +void unix_peek_fpl(struct scm_fp_list *fpl); void unix_schedule_gc(struct user_struct *user); /* SOCK_DIAG */
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 816e8fa..a7967a3 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c
@@ -318,6 +318,25 @@ void unix_destroy_fpl(struct scm_fp_list *fpl) unix_free_vertices(fpl); } +static bool gc_in_progress; +static seqcount_t unix_peek_seq = SEQCNT_ZERO(unix_peek_seq); + +void unix_peek_fpl(struct scm_fp_list *fpl) +{ + static DEFINE_SPINLOCK(unix_peek_lock); + + if (!fpl || !fpl->count_unix) + return; + + if (!READ_ONCE(gc_in_progress)) + return; + + /* Invalidate the final refcnt check in unix_vertex_dead(). */ + spin_lock(&unix_peek_lock); + raw_write_seqcount_barrier(&unix_peek_seq); + spin_unlock(&unix_peek_lock); +} + static bool unix_vertex_dead(struct unix_vertex *vertex) { struct unix_edge *edge; @@ -351,6 +370,36 @@ static bool unix_vertex_dead(struct unix_vertex *vertex) return true; } +static LIST_HEAD(unix_visited_vertices); +static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; + +static bool unix_scc_dead(struct list_head *scc, bool fast) +{ + struct unix_vertex *vertex; + bool scc_dead = true; + unsigned int seq; + + seq = read_seqcount_begin(&unix_peek_seq); + + list_for_each_entry_reverse(vertex, scc, scc_entry) { + /* Don't restart DFS from this vertex. */ + list_move_tail(&vertex->entry, &unix_visited_vertices); + + /* Mark vertex as off-stack for __unix_walk_scc(). */ + if (!fast) + vertex->index = unix_vertex_grouped_index; + + if (scc_dead) + scc_dead = unix_vertex_dead(vertex); + } + + /* If MSG_PEEK intervened, defer this SCC to the next round. */ + if (read_seqcount_retry(&unix_peek_seq, seq)) + return false; + + return scc_dead; +} + static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist) { struct unix_vertex *vertex; @@ -404,9 +453,6 @@ static bool unix_scc_cyclic(struct list_head *scc) return false; } -static LIST_HEAD(unix_visited_vertices); -static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; - static unsigned long __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index, struct sk_buff_head *hitlist) @@ -474,9 +520,7 @@ static unsigned long __unix_walk_scc(struct unix_vertex *vertex, } if (vertex->index == vertex->scc_index) { - struct unix_vertex *v; struct list_head scc; - bool scc_dead = true; /* SCC finalised. * @@ -485,18 +529,7 @@ static unsigned long __unix_walk_scc(struct unix_vertex *vertex, */ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); - list_for_each_entry_reverse(v, &scc, scc_entry) { - /* Don't restart DFS from this vertex in unix_walk_scc(). */ - list_move_tail(&v->entry, &unix_visited_vertices); - - /* Mark vertex as off-stack. */ - v->index = unix_vertex_grouped_index; - - if (scc_dead) - scc_dead = unix_vertex_dead(v); - } - - if (scc_dead) { + if (unix_scc_dead(&scc, false)) { unix_collect_skb(&scc, hitlist); } else { if (unix_vertex_max_scc_index < vertex->scc_index) @@ -550,19 +583,11 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist) while (!list_empty(&unix_unvisited_vertices)) { struct unix_vertex *vertex; struct list_head scc; - bool scc_dead = true; vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); list_add(&scc, &vertex->scc_entry); - list_for_each_entry_reverse(vertex, &scc, scc_entry) { - list_move_tail(&vertex->entry, &unix_visited_vertices); - - if (scc_dead) - scc_dead = unix_vertex_dead(vertex); - } - - if (scc_dead) { + if (unix_scc_dead(&scc, true)) { cyclic_sccs--; unix_collect_skb(&scc, hitlist); } @@ -577,8 +602,6 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist) cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC); } -static bool gc_in_progress; - static void unix_gc(struct work_struct *work) { struct sk_buff_head hitlist;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9880756d..d912ed2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c
@@ -90,16 +90,20 @@ * * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's * mode, which is set at namespace creation and immutable thereafter. - * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future + * - /proc/sys/net/vsock/child_ns_mode (write-once) controls what mode future * child namespaces will inherit when created. The initial value matches * the namespace's own ns_mode. * * Changing child_ns_mode only affects newly created namespaces, not the * current namespace or existing children. A "local" namespace cannot set - * child_ns_mode to "global". At namespace creation, ns_mode is inherited - * from the parent's child_ns_mode. + * child_ns_mode to "global". child_ns_mode is write-once, so that it may be + * configured and locked down by a namespace manager. Writing a different + * value after the first write returns -EBUSY. At namespace creation, ns_mode + * is inherited from the parent's child_ns_mode. * - * The init_net mode is "global" and cannot be modified. + * The init_net mode is "global" and cannot be modified. The init_net + * child_ns_mode is also write-once, so an init process (e.g. systemd) can + * set it to "local" to ensure all new namespaces inherit local mode. * * The modes affect the allocation and accessibility of CIDs as follows: * @@ -2825,7 +2829,7 @@ static int vsock_net_mode_string(const struct ctl_table *table, int write, if (write) return -EPERM; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.mode); return __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_mode(net), NULL); @@ -2838,7 +2842,7 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, struct net *net; int ret; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.child_ns_mode); ret = __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_child_mode(net), &new_mode); @@ -2853,7 +2857,8 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, new_mode == VSOCK_NET_MODE_GLOBAL) return -EPERM; - vsock_net_set_child_mode(net, new_mode); + if (!vsock_net_set_child_mode(net, new_mode)) + return -EBUSY; } return 0; @@ -2923,6 +2928,7 @@ static void vsock_net_init(struct net *net) net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); net->vsock.child_ns_mode = net->vsock.mode; + net->vsock.child_ns_mode_locked = 0; } static __net_init int vsock_sysctl_init_net(struct net *net)
diff --git a/net/wireless/core.c b/net/wireless/core.c index 4b0645d..28ca429 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c
@@ -1211,6 +1211,7 @@ void wiphy_unregister(struct wiphy *wiphy) /* this has nothing to do now but make sure it's gone */ cancel_work_sync(&rdev->wiphy_work); + cancel_work_sync(&rdev->rfkill_block); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 44bd88c..50e8e19 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c
@@ -664,6 +664,7 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) } spin_unlock_bh(&wdev->pmsr_lock); + cancel_work_sync(&wdev->pmsr_free_wk); if (found) cfg80211_pmsr_process_abort(wdev);
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 326faea..c85eaa5 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c
@@ -239,14 +239,14 @@ int ieee80211_radiotap_iterator_next( default: if (!iterator->current_namespace || iterator->_arg_index >= iterator->current_namespace->n_bits) { - if (iterator->current_namespace == &radiotap_ns) - return -ENOENT; align = 0; } else { align = iterator->current_namespace->align_size[iterator->_arg_index].align; size = iterator->current_namespace->align_size[iterator->_arg_index].size; } if (!align) { + if (iterator->current_namespace == &radiotap_ns) + return -ENOENT; /* skip all subsequent data */ iterator->_arg = iterator->_next_ns_data; /* give up on this namespace */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 55783a7..5a70a01 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c
@@ -683,7 +683,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, idx = erq->flags & IW_ENCODE_INDEX; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - if (idx < 4 || idx > 5) { + if (idx < 5 || idx > 6) { idx = wdev->wext.default_mgmt_key; if (idx < 0) return -EINVAL;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b981a48..e47ebd8 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c
@@ -34,6 +34,10 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) struct sk_buff *skbo, *skbn = skb; struct x25_sock *x25 = x25_sk(sk); + /* make sure we don't overflow */ + if (x25->fraglen + skb->len > USHRT_MAX) + return 1; + if (more) { x25->fraglen += skb->len; skb_queue_tail(&x25->fragment_queue, skb); @@ -44,10 +48,9 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) if (x25->fraglen > 0) { /* End of fragment */ int len = x25->fraglen + skb->len; - if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){ - kfree_skb(skb); + skbn = alloc_skb(len, GFP_ATOMIC); + if (!skbn) return 1; - } skb_queue_tail(&x25->fragment_queue, skb);
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 0285aaa..159708d 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c
@@ -40,6 +40,7 @@ void x25_clear_queues(struct sock *sk) skb_queue_purge(&x25->interrupt_in_queue); skb_queue_purge(&x25->interrupt_out_queue); skb_queue_purge(&x25->fragment_queue); + x25->fraglen = 0; }
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3b46bc6..6149f6a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c
@@ -167,26 +167,32 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) struct xdp_buff_xsk *pos, *tmp; struct list_head *xskb_list; u32 contd = 0; + u32 num_desc; int err; - if (frags) - contd = XDP_PKT_CONTD; - - err = __xsk_rcv_zc(xs, xskb, len, contd); - if (err) - goto err; - if (likely(!frags)) + if (likely(!frags)) { + err = __xsk_rcv_zc(xs, xskb, len, contd); + if (err) + goto err; return 0; + } + contd = XDP_PKT_CONTD; + num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1; + if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) { + xs->rx_queue_full++; + err = -ENOBUFS; + goto err; + } + + __xsk_rcv_zc(xs, xskb, len, contd); xskb_list = &xskb->pool->xskb_list; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { if (list_is_singular(xskb_list)) contd = 0; len = pos->xdp.data_end - pos->xdp.data; - err = __xsk_rcv_zc(xs, pos, len, contd); - if (err) - goto err; - list_del(&pos->list_node); + __xsk_rcv_zc(xs, pos, len, contd); + list_del_init(&pos->list_node); } return 0;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 61f1cef8..e1b11ab 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c
@@ -536,7 +536,7 @@ static void espintcp_close(struct sock *sk, long timeout) sk->sk_prot = &tcp_prot; barrier(); - cancel_work_sync(&ctx->work); + disable_work_sync(&ctx->work); strp_done(&ctx->strp); skb_queue_purge(&ctx->out_queue);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 52ae0e0..550457e 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c
@@ -544,6 +544,14 @@ static int xfrm_dev_down(struct net_device *dev) return NOTIFY_DONE; } +static int xfrm_dev_unregister(struct net_device *dev) +{ + xfrm_dev_state_flush(dev_net(dev), dev, true); + xfrm_dev_policy_flush(dev_net(dev), dev, true); + + return NOTIFY_DONE; +} + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -556,8 +564,10 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return xfrm_api_check(dev); case NETDEV_DOWN: - case NETDEV_UNREGISTER: return xfrm_dev_down(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); } return NOTIFY_DONE; }
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 4ed346e..dc1312e 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c
@@ -75,7 +75,10 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { - if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) + const struct xfrm_input_afinfo *cur; + + cur = rcu_access_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]); + if (unlikely(cur != afinfo)) err = -EINVAL; else RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL);
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 050a821..97bc979e5 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c
@@ -901,6 +901,12 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { iptfs_skb_add_frags(newskb, fragwalk, data, copylen); } else { + if (skb_linearize(newskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* copy fragment data into newskb */ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { @@ -991,6 +997,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; + if (iplen < iphlen || iphlen < sizeof(*iph)) { + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINHDRERROR); + goto done; + } protocol = cpu_to_be16(ETH_P_IP); XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { @@ -2653,9 +2664,6 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; - x->mode_data = xtfs; - xtfs->x = x; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, @@ -2666,6 +2674,9 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) } } + x->mode_data = xtfs; + xtfs->x = x; + return 0; }
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index ebf95d4..1856bee 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -261,7 +261,7 @@ int __net_init xfrm_nat_keepalive_net_init(struct net *net) int xfrm_nat_keepalive_net_fini(struct net *net) { - cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + disable_delayed_work_sync(&net->xfrm.nat_keepalive_work); return 0; }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ee8bb44..362939aa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c
@@ -3801,8 +3801,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; + int i, k = 0; int ti = 0; - int i, k; sp = skb_sec_path(skb); if (!sp) @@ -3828,6 +3828,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp = stp; } + if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET && sp == &dummy) + /* This policy template was already checked by HW + * and secpath was removed in __xfrm_policy_check2. + */ + goto out; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement @@ -3837,7 +3843,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * verified to allow them to be skipped in future policy * checks (e.g. nested tunnels). */ - for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + for (i = xfrm_nr - 1; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) @@ -3853,6 +3859,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, goto reject; } +out: xfrm_pols_put(pols, npols); sp->verified_cnt = k; @@ -4149,7 +4156,7 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { - if (xfrm_policy_afinfo[i] != afinfo) + if (rcu_access_pointer(xfrm_policy_afinfo[i]) != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; @@ -4235,7 +4242,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); + rcu_assign_pointer(htab->table, xfrm_hash_alloc(sz)); if (!htab->table) goto out_bydst; htab->hmask = hmask; @@ -4262,7 +4269,7 @@ static int __net_init xfrm_policy_init(struct net *net) struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; - xfrm_hash_free(htab->table, sz); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: @@ -4275,6 +4282,8 @@ static void xfrm_policy_fini(struct net *net) unsigned int sz; int dir; + disable_work_sync(&net->xfrm.policy_hthresh.work); + flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); @@ -4288,8 +4297,8 @@ static void xfrm_policy_fini(struct net *net) htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(htab->table)); - xfrm_hash_free(htab->table, sz); + WARN_ON(!hlist_empty(rcu_dereference_protected(htab->table, true))); + xfrm_hash_free(rcu_dereference_protected(htab->table, true), sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98b362d..1748d37 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); static HLIST_HEAD(xfrm_state_dev_gc_list); -static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +static inline bool xfrm_state_hold_rcu(struct xfrm_state *x) { return refcount_inc_not_zero(&x->refcnt); } @@ -870,7 +870,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, task_valid); @@ -891,7 +891,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool struct xfrm_state *x; struct xfrm_dev_offload *xso; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (xso->dev == dev && @@ -931,7 +931,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) for (i = 0; i <= net->xfrm.state_hmask; i++) { struct xfrm_state *x; restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -973,7 +973,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { restart: - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) { xso = &x->xso; if (!xfrm_state_kern(x) && xso->dev == dev) { @@ -1563,23 +1563,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, list_add(&x->km.all, &net->xfrm.state_all); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, - net->xfrm.state_bydst + h, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, encap_family); XFRM_STATE_INSERT(bysrc, &x->bysrc, - net->xfrm.state_bysrc + h, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, - net->xfrm.state_byspi + h, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); XFRM_STATE_INSERT(byseq, &x->byseq, - net->xfrm.state_byseq + h, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; @@ -1652,7 +1652,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1703,18 +1703,12 @@ static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 sp struct xfrm_state *x; unsigned int i; - rcu_read_lock(); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { - if (x->id.spi == spi && x->id.proto == proto) { - if (!xfrm_state_hold_rcu(x)) - continue; - rcu_read_unlock(); + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byspi, net) + i, byspi) { + if (x->id.spi == spi && x->id.proto == proto) return x; - } } } - rcu_read_unlock(); return NULL; } @@ -1730,25 +1724,29 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, x->xso.type); } if (x->km.seq) { h = xfrm_seq_hash(net, x->km.seq); - XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h, + XFRM_STATE_INSERT(byseq, &x->byseq, + xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, x->xso.type); } @@ -1775,7 +1773,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && @@ -1811,7 +1809,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1868,10 +1866,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL_SOFT); list_add(&x->km.all, &net->xfrm.state_all); - XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, + XFRM_STATE_INSERT(bydst, &x->bydst, + xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, x->xso.type); h = xfrm_src_hash(net, daddr, saddr, family); - XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, + XFRM_STATE_INSERT(bysrc, &x->bysrc, + xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, x->xso.type); net->xfrm.state_num++; @@ -2091,7 +2091,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2110,7 +2110,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bysrc, net) + h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2264,6 +2264,7 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); } @@ -2312,7 +2313,7 @@ void xfrm_state_update_stats(struct net *net) spin_lock_bh(&net->xfrm.xfrm_state_lock); for (i = 0; i <= net->xfrm.state_hmask; i++) { - hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_bydst, net) + i, bydst) xfrm_dev_state_update_stats(x); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); @@ -2503,7 +2504,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + hlist_for_each_entry(x, xfrm_state_deref_prot(net->xfrm.state_byseq, net) + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->pcpu_num == pcpu_num && @@ -2602,12 +2603,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, if (!x0) { x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + XFRM_STATE_INSERT(byspi, &x->byspi, + xfrm_state_deref_prot(net->xfrm.state_byspi, net) + h, + x->xso.type); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; goto unlock; } - xfrm_state_put(x0); spin_unlock_bh(&net->xfrm.xfrm_state_lock); next: @@ -3258,6 +3260,7 @@ EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) { + struct hlist_head *ndst, *nsrc, *nspi, *nseq; unsigned int sz; if (net_eq(net, &init_net)) @@ -3268,18 +3271,25 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - net->xfrm.state_bydst = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bydst) + ndst = xfrm_hash_alloc(sz); + if (!ndst) goto out_bydst; - net->xfrm.state_bysrc = xfrm_hash_alloc(sz); - if (!net->xfrm.state_bysrc) + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + + nsrc = xfrm_hash_alloc(sz); + if (!nsrc) goto out_bysrc; - net->xfrm.state_byspi = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byspi) + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + + nspi = xfrm_hash_alloc(sz); + if (!nspi) goto out_byspi; - net->xfrm.state_byseq = xfrm_hash_alloc(sz); - if (!net->xfrm.state_byseq) + rcu_assign_pointer(net->xfrm.state_byspi, nspi); + + nseq = xfrm_hash_alloc(sz); + if (!nseq) goto out_byseq; + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); if (!net->xfrm.state_cache_input) @@ -3295,17 +3305,19 @@ int __net_init xfrm_state_init(struct net *net) return 0; out_state_cache_input: - xfrm_hash_free(net->xfrm.state_byseq, sz); + xfrm_hash_free(nseq, sz); out_byseq: - xfrm_hash_free(net->xfrm.state_byspi, sz); + xfrm_hash_free(nspi, sz); out_byspi: - xfrm_hash_free(net->xfrm.state_bysrc, sz); + xfrm_hash_free(nsrc, sz); out_bysrc: - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(ndst, sz); out_bydst: return -ENOMEM; } +#define xfrm_state_deref_netexit(table) \ + rcu_dereference_protected((table), true /* netns is going away */) void xfrm_state_fini(struct net *net) { unsigned int sz; @@ -3318,17 +3330,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); for (i = 0; i <= net->xfrm.state_hmask; i++) { - WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); - WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); - WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byseq) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_byspi) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bysrc) + i)); + WARN_ON(!hlist_empty(xfrm_state_deref_netexit(net->xfrm.state_bydst) + i)); } sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(net->xfrm.state_byseq, sz); - xfrm_hash_free(net->xfrm.state_byspi, sz); - xfrm_hash_free(net->xfrm.state_bysrc, sz); - xfrm_hash_free(net->xfrm.state_bydst, sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byseq), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_byspi), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bysrc), sz); + xfrm_hash_free(xfrm_state_deref_netexit(net->xfrm.state_bydst), sz); free_percpu(net->xfrm.state_cache_input); }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 403b5ec..1656b48 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c
@@ -35,6 +35,15 @@ #endif #include <linux/unaligned.h> +static struct sock *xfrm_net_nlsk(const struct net *net, const struct sk_buff *skb) +{ + /* get the source of this request, see netlink_unicast_kernel */ + const struct sock *sk = NETLINK_CB(skb).sk; + + /* sk is refcounted, the netns stays alive and nlsk with it */ + return rcu_dereference_protected(net->xfrm.nlsk, sk->sk_net_refcnt); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { @@ -1727,7 +1736,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) @@ -1787,7 +1796,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1807,7 +1816,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1850,6 +1859,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto out_noput; } } @@ -1897,7 +1907,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -2542,7 +2552,7 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); + return nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2608,7 +2618,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), resp_skb, NETLINK_CB(skb).portid); } } else { @@ -2781,7 +2791,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); + err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -3001,8 +3011,10 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto free_state; + } } err = verify_newpolicy_info(&ua->policy, extack); @@ -3483,7 +3495,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + err = netlink_dump_start(xfrm_net_nlsk(net, skb), skb, nlh, &c); goto err; } @@ -3673,7 +3685,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); - if (x->pcpu_num) + if (x->pcpu_num != UINT_MAX) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */
diff --git a/rust/Makefile b/rust/Makefile index 629b3bd..9801af2 100644 --- a/rust/Makefile +++ b/rust/Makefile
@@ -148,7 +148,8 @@ quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< cmd_rustdoc = \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) $(filter-out $(skip_flags) --remap-path-prefix=%,$(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ + $(RUSTDOC) $(filter-out $(skip_flags) --remap-path-prefix=% --remap-path-scope=%, \ + $(if $(rustdoc_host),$(rust_common_flags),$(rust_flags))) \ $(rustc_target_flags) -L$(objtree)/$(obj) \ -Zunstable-options --generate-link-to-definition \ --output $(rustdoc_output) \ @@ -334,7 +335,7 @@ rm -rf $(objtree)/$(obj)/test/doctests/kernel; \ mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \ OBJTREE=$(abspath $(objtree)) \ - $(RUSTDOC) --test $(filter-out --remap-path-prefix=%,$(rust_flags)) \ + $(RUSTDOC) --test $(filter-out --remap-path-prefix=% --remap-path-scope=%,$(rust_flags)) \ -L$(objtree)/$(obj) --extern ffi --extern pin_init \ --extern kernel --extern build_error --extern macros \ --extern bindings --extern uapi \ @@ -526,11 +527,9 @@ cmd_rustc_procmacrolibrary = \ $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ $(filter-out $(skip_flags),$(rust_common_flags) $(rustc_target_flags)) \ - --emit=dep-info,link --crate-type rlib -O \ + --emit=dep-info=$(depfile) --emit=link=$@ --crate-type rlib -O \ --out-dir $(objtree)/$(obj) -L$(objtree)/$(obj) \ - --crate-name $(patsubst lib%.rlib,%,$(notdir $@)) $<; \ - mv $(objtree)/$(obj)/$(patsubst lib%.rlib,%,$(notdir $@)).d $(depfile); \ - sed -i '/^\#/d' $(depfile) + --crate-name $(patsubst lib%.rlib,%,$(notdir $@)) $< $(obj)/libproc_macro2.rlib: private skip_clippy = 1 $(obj)/libproc_macro2.rlib: private rustc_target_flags = $(proc_macro2-flags)
diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 76faa1a..f5adee4 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs
@@ -401,6 +401,7 @@ pub fn to_table(mut self) -> Result<TableBox> { /// ``` /// use kernel::cpufreq::{DEFAULT_TRANSITION_LATENCY_NS, Policy}; /// +/// #[allow(clippy::double_parens, reason = "False positive before 1.92.0")] /// fn update_policy(policy: &mut Policy) { /// policy /// .set_dvfs_possible_from_any_cpu(true)
diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 909d56f..a396f84 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs
@@ -461,6 +461,19 @@ pub fn size(&self) -> usize { self.count * core::mem::size_of::<T>() } + /// Returns the raw pointer to the allocated region in the CPU's virtual address space. + #[inline] + pub fn as_ptr(&self) -> *const [T] { + core::ptr::slice_from_raw_parts(self.cpu_addr.as_ptr(), self.count) + } + + /// Returns the raw pointer to the allocated region in the CPU's virtual address space as + /// a mutable pointer. + #[inline] + pub fn as_mut_ptr(&self) -> *mut [T] { + core::ptr::slice_from_raw_parts_mut(self.cpu_addr.as_ptr(), self.count) + } + /// Returns the base address to the allocated region in the CPU's virtual address space. pub fn start_ptr(&self) -> *const T { self.cpu_addr.as_ptr() @@ -581,23 +594,6 @@ pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result { Ok(()) } - /// Returns a pointer to an element from the region with bounds checking. `offset` is in - /// units of `T`, not the number of bytes. - /// - /// Public but hidden since it should only be used from [`dma_read`] and [`dma_write`] macros. - #[doc(hidden)] - pub fn item_from_index(&self, offset: usize) -> Result<*mut T> { - if offset >= self.count { - return Err(EINVAL); - } - // SAFETY: - // - The pointer is valid due to type invariant on `CoherentAllocation` - // and we've just checked that the range and index is within bounds. - // - `offset` can't overflow since it is smaller than `self.count` and we've checked - // that `self.count` won't overflow early in the constructor. - Ok(unsafe { self.cpu_addr.as_ptr().add(offset) }) - } - /// Reads the value of `field` and ensures that its type is [`FromBytes`]. /// /// # Safety @@ -670,6 +666,9 @@ unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {} /// Reads a field of an item from an allocated region of structs. /// +/// The syntax is of the form `kernel::dma_read!(dma, proj)` where `dma` is an expression evaluating +/// to a [`CoherentAllocation`] and `proj` is a [projection specification](kernel::ptr::project!). +/// /// # Examples /// /// ``` @@ -684,36 +683,29 @@ unsafe impl<T: AsBytes + FromBytes + Send> Send for CoherentAllocation<T> {} /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result { -/// let whole = kernel::dma_read!(alloc[2]); -/// let field = kernel::dma_read!(alloc[1].field); +/// let whole = kernel::dma_read!(alloc, [2]?); +/// let field = kernel::dma_read!(alloc, [1]?.field); /// # Ok::<(), Error>(()) } /// ``` #[macro_export] macro_rules! dma_read { - ($dma:expr, $idx: expr, $($field:tt)*) => {{ - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of!((*item) $($field)*); - ::core::result::Result::Ok( - $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field) - ) - } - })() + ($dma:expr, $($proj:tt)*) => {{ + let dma = &$dma; + let ptr = $crate::ptr::project!( + $crate::dma::CoherentAllocation::as_ptr(dma), $($proj)* + ); + // SAFETY: The pointer created by the projection is within the DMA region. + unsafe { $crate::dma::CoherentAllocation::field_read(dma, ptr) } }}; - ($dma:ident [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($dma, $idx, $($field)*) - }; - ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($($dma).*, $idx, $($field)*) - }; } /// Writes to a field of an item from an allocated region of structs. /// +/// The syntax is of the form `kernel::dma_write!(dma, proj, val)` where `dma` is an expression +/// evaluating to a [`CoherentAllocation`], `proj` is a +/// [projection specification](kernel::ptr::project!), and `val` is the value to be written to the +/// projected location. +/// /// # Examples /// /// ``` @@ -728,37 +720,31 @@ macro_rules! dma_read { /// unsafe impl kernel::transmute::AsBytes for MyStruct{}; /// /// # fn test(alloc: &kernel::dma::CoherentAllocation<MyStruct>) -> Result { -/// kernel::dma_write!(alloc[2].member = 0xf); -/// kernel::dma_write!(alloc[1] = MyStruct { member: 0xf }); +/// kernel::dma_write!(alloc, [2]?.member, 0xf); +/// kernel::dma_write!(alloc, [1]?, MyStruct { member: 0xf }); /// # Ok::<(), Error>(()) } /// ``` #[macro_export] macro_rules! dma_write { - ($dma:ident [ $idx:expr ] $($field:tt)*) => {{ - $crate::dma_write!($dma, $idx, $($field)*) + (@parse [$dma:expr] [$($proj:tt)*] [, $val:expr]) => {{ + let dma = &$dma; + let ptr = $crate::ptr::project!( + mut $crate::dma::CoherentAllocation::as_mut_ptr(dma), $($proj)* + ); + let val = $val; + // SAFETY: The pointer created by the projection is within the DMA region. + unsafe { $crate::dma::CoherentAllocation::field_write(dma, ptr, val) } }}; - ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {{ - $crate::dma_write!($($dma).*, $idx, $($field)*) - }}; - ($dma:expr, $idx: expr, = $val:expr) => { - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid item. - unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) } - ::core::result::Result::Ok(()) - })() + (@parse [$dma:expr] [$($proj:tt)*] [.$field:tt $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* .$field] [$($rest)*]) }; - ($dma:expr, $idx: expr, $(.$field:ident)* = $val:expr) => { - (|| -> ::core::result::Result<_, $crate::error::Error> { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*); - $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val) - } - ::core::result::Result::Ok(()) - })() + (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr]? $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* [$index]?] [$($rest)*]) + }; + (@parse [$dma:expr] [$($proj:tt)*] [[$index:expr] $($rest:tt)*]) => { + $crate::dma_write!(@parse [$dma] [$($proj)* [$index]] [$($rest)*]) + }; + ($dma:expr, $($rest:tt)*) => { + $crate::dma_write!(@parse [$dma] [] [$($rest)*]) }; }
diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index c1cca7b..e5fba6b 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs
@@ -139,9 +139,9 @@ pub fn maxsize(&self) -> usize { /// Internal helper macros used to invoke C MMIO read functions. /// -/// This macro is intended to be used by higher-level MMIO access macros (define_read) and provides -/// a unified expansion for infallible vs. fallible read semantics. It emits a direct call into the -/// corresponding C helper and performs the required cast to the Rust return type. +/// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and +/// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call +/// into the corresponding C helper and performs the required cast to the Rust return type. /// /// # Parameters /// @@ -166,9 +166,9 @@ macro_rules! call_mmio_read { /// Internal helper macros used to invoke C MMIO write functions. /// -/// This macro is intended to be used by higher-level MMIO access macros (define_write) and provides -/// a unified expansion for infallible vs. fallible write semantics. It emits a direct call into the -/// corresponding C helper and performs the required cast to the Rust return type. +/// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and +/// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call +/// into the corresponding C helper and performs the required cast to the Rust return type. /// /// # Parameters /// @@ -193,7 +193,30 @@ macro_rules! call_mmio_write { }}; } -macro_rules! define_read { +/// Generates an accessor method for reading from an I/O backend. +/// +/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked +/// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address +/// calculation and bounds checking, and delegates the actual I/O read operation to a specified +/// helper macro, making it generic over different I/O backends. +/// +/// # Parameters +/// +/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on +/// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs +/// runtime checks against `maxsize()` and returns a `Result<T>`. +/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., +/// `#[cfg(CONFIG_64BIT)]` or inline directives). +/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). +/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`, +/// `try_read8`). +/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call +/// (e.g., `call_mmio_read`). +/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the +/// `$call_macro`. +/// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). +#[macro_export] +macro_rules! io_define_read { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> $type_name:ty) => { /// Read IO data from a given offset known at compile time. @@ -226,9 +249,33 @@ macro_rules! define_read { } }; } -pub(crate) use define_read; +pub use io_define_read; -macro_rules! define_write { +/// Generates an accessor method for writing to an I/O backend. +/// +/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked +/// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address +/// calculation and bounds checking, and delegates the actual I/O write operation to a specified +/// helper macro, making it generic over different I/O backends. +/// +/// # Parameters +/// +/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on +/// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks +/// against `maxsize()` and returns a `Result`. +/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., +/// `#[cfg(CONFIG_64BIT)]` or inline directives). +/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). +/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`, +/// `try_write8`). +/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call +/// (e.g., `call_mmio_write`). +/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the +/// `$call_macro`. +/// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use +/// of `<-` before the type to denote a write operation. +#[macro_export] +macro_rules! io_define_write { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- $type_name:ty) => { /// Write IO data from a given offset known at compile time. @@ -259,7 +306,7 @@ macro_rules! define_write { } }; } -pub(crate) use define_write; +pub use io_define_write; /// Checks whether an access of type `U` at the given `offset` /// is valid within this region. @@ -509,40 +556,40 @@ fn maxsize(&self) -> usize { self.0.maxsize() } - define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); - define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); - define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); - define_read!( + io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); + io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); + io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); + io_define_read!( fallible, #[cfg(CONFIG_64BIT)] try_read64, call_mmio_read(readq) -> u64 ); - define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); - define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); - define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); - define_write!( + io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); + io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); + io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); + io_define_write!( fallible, #[cfg(CONFIG_64BIT)] try_write64, call_mmio_write(writeq) <- u64 ); - define_read!(infallible, read8, call_mmio_read(readb) -> u8); - define_read!(infallible, read16, call_mmio_read(readw) -> u16); - define_read!(infallible, read32, call_mmio_read(readl) -> u32); - define_read!( + io_define_read!(infallible, read8, call_mmio_read(readb) -> u8); + io_define_read!(infallible, read16, call_mmio_read(readw) -> u16); + io_define_read!(infallible, read32, call_mmio_read(readl) -> u32); + io_define_read!( infallible, #[cfg(CONFIG_64BIT)] read64, call_mmio_read(readq) -> u64 ); - define_write!(infallible, write8, call_mmio_write(writeb) <- u8); - define_write!(infallible, write16, call_mmio_write(writew) <- u16); - define_write!(infallible, write32, call_mmio_write(writel) <- u32); - define_write!( + io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8); + io_define_write!(infallible, write16, call_mmio_write(writew) <- u16); + io_define_write!(infallible, write32, call_mmio_write(writel) <- u32); + io_define_write!( infallible, #[cfg(CONFIG_64BIT)] write64, @@ -566,40 +613,40 @@ pub unsafe fn from_raw(raw: &MmioRaw<SIZE>) -> &Self { unsafe { &*core::ptr::from_ref(raw).cast() } } - define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - define_read!( + io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); + io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); + io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); + io_define_read!( infallible, #[cfg(CONFIG_64BIT)] pub read64_relaxed, call_mmio_read(readq_relaxed) -> u64 ); - define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - define_read!( + io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); + io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); + io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); + io_define_read!( fallible, #[cfg(CONFIG_64BIT)] pub try_read64_relaxed, call_mmio_read(readq_relaxed) -> u64 ); - define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - define_write!( + io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); + io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); + io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); + io_define_write!( infallible, #[cfg(CONFIG_64BIT)] pub write64_relaxed, call_mmio_write(writeq_relaxed) <- u64 ); - define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - define_write!( + io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); + io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); + io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); + io_define_write!( fallible, #[cfg(CONFIG_64BIT)] pub try_write64_relaxed,
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index f93f24a..a1edf74 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs
@@ -14,6 +14,10 @@ /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn err(args: fmt::Arguments<'_>) { + // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning. + #[cfg(not(CONFIG_PRINTK))] + let _ = args; + // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)] @@ -30,6 +34,10 @@ pub fn err(args: fmt::Arguments<'_>) { /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn info(args: fmt::Arguments<'_>) { + // `args` is unused if `CONFIG_PRINTK` is not set - this avoids a build-time warning. + #[cfg(not(CONFIG_PRINTK))] + let _ = args; + // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)]
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 3da92f18f..d93292d 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs
@@ -20,6 +20,7 @@ #![feature(generic_nonzero)] #![feature(inline_const)] #![feature(pointer_is_aligned)] +#![feature(slice_ptr_len)] // // Stable since Rust 1.80.0. #![feature(slice_flatten)] @@ -37,6 +38,9 @@ #![feature(const_ptr_write)] #![feature(const_refs_to_cell)] // +// Stable since Rust 1.84.0. +#![feature(strict_provenance)] +// // Expected to become stable. #![feature(arbitrary_self_types)] //
diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs index 6ca4cf7..fb6edab 100644 --- a/rust/kernel/pci/io.rs +++ b/rust/kernel/pci/io.rs
@@ -8,8 +8,8 @@ device, devres::Devres, io::{ - define_read, - define_write, + io_define_read, + io_define_write, Io, IoCapable, IoKnownSize, @@ -88,7 +88,7 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> { /// Internal helper macros used to invoke C PCI configuration space read functions. /// /// This macro is intended to be used by higher-level PCI configuration space access macros -/// (define_read) and provides a unified expansion for infallible vs. fallible read semantics. It +/// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It /// emits a direct call into the corresponding C helper and performs the required cast to the Rust /// return type. /// @@ -117,9 +117,9 @@ macro_rules! call_config_read { /// Internal helper macros used to invoke C PCI configuration space write functions. /// /// This macro is intended to be used by higher-level PCI configuration space access macros -/// (define_write) and provides a unified expansion for infallible vs. fallible read semantics. It -/// emits a direct call into the corresponding C helper and performs the required cast to the Rust -/// return type. +/// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics. +/// It emits a direct call into the corresponding C helper and performs the required cast to the +/// Rust return type. /// /// # Parameters /// @@ -163,13 +163,13 @@ fn maxsize(&self) -> usize { // PCI configuration space does not support fallible operations. // The default implementations from the Io trait are not used. - define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); - define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); - define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); + io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); + io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); + io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); - define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); - define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); - define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); + io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); + io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); + io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); } impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> {
diff --git a/rust/kernel/ptr.rs b/rust/kernel/ptr.rs index 5b6a382..bdc2d79 100644 --- a/rust/kernel/ptr.rs +++ b/rust/kernel/ptr.rs
@@ -2,7 +2,13 @@ //! Types and functions to work with pointers and addresses. -use core::mem::align_of; +pub mod projection; +pub use crate::project_pointer as project; + +use core::mem::{ + align_of, + size_of, // +}; use core::num::NonZero; /// Type representing an alignment, which is always a power of two. @@ -225,3 +231,25 @@ fn align_up(self, alignment: Alignment) -> Option<Self> { } impl_alignable_uint!(u8, u16, u32, u64, usize); + +/// Trait to represent compile-time known size information. +/// +/// This is a generalization of [`size_of`] that works for dynamically sized types. +pub trait KnownSize { + /// Get the size of an object of this type in bytes, with the metadata of the given pointer. + fn size(p: *const Self) -> usize; +} + +impl<T> KnownSize for T { + #[inline(always)] + fn size(_: *const Self) -> usize { + size_of::<T>() + } +} + +impl<T> KnownSize for [T] { + #[inline(always)] + fn size(p: *const Self) -> usize { + p.len() * size_of::<T>() + } +}
diff --git a/rust/kernel/ptr/projection.rs b/rust/kernel/ptr/projection.rs new file mode 100644 index 0000000..140ea8e --- /dev/null +++ b/rust/kernel/ptr/projection.rs
@@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Infrastructure for handling projections. + +use core::{ + mem::MaybeUninit, + ops::Deref, // +}; + +use crate::prelude::*; + +/// Error raised when a projection is attempted on an array or slice out of bounds. +pub struct OutOfBound; + +impl From<OutOfBound> for Error { + #[inline(always)] + fn from(_: OutOfBound) -> Self { + ERANGE + } +} + +/// A helper trait to perform index projection. +/// +/// This is similar to [`core::slice::SliceIndex`], but operates on raw pointers safely and +/// fallibly. +/// +/// # Safety +/// +/// The implementation of `index` and `get` (if [`Some`] is returned) must ensure that, if provided +/// input pointer `slice` and returned pointer `output`, then: +/// - `output` has the same provenance as `slice`; +/// - `output.byte_offset_from(slice)` is between 0 to +/// `KnownSize::size(slice) - KnownSize::size(output)`. +/// +/// This means that if the input pointer is valid, then pointer returned by `get` or `index` is +/// also valid. +#[diagnostic::on_unimplemented(message = "`{Self}` cannot be used to index `{T}`")] +#[doc(hidden)] +pub unsafe trait ProjectIndex<T: ?Sized>: Sized { + type Output: ?Sized; + + /// Returns an index-projected pointer, if in bounds. + fn get(self, slice: *mut T) -> Option<*mut Self::Output>; + + /// Returns an index-projected pointer; fail the build if it cannot be proved to be in bounds. + #[inline(always)] + fn index(self, slice: *mut T) -> *mut Self::Output { + Self::get(self, slice).unwrap_or_else(|| build_error!()) + } +} + +// Forward array impl to slice impl. +// +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl<T, I, const N: usize> ProjectIndex<[T; N]> for I +where + I: ProjectIndex<[T]>, +{ + type Output = <I as ProjectIndex<[T]>>::Output; + + #[inline(always)] + fn get(self, slice: *mut [T; N]) -> Option<*mut Self::Output> { + <I as ProjectIndex<[T]>>::get(self, slice) + } + + #[inline(always)] + fn index(self, slice: *mut [T; N]) -> *mut Self::Output { + <I as ProjectIndex<[T]>>::index(self, slice) + } +} + +// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to +// not exceed the required bound. +unsafe impl<T> ProjectIndex<[T]> for usize { + type Output = T; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut T> { + if self >= slice.len() { + None + } else { + Some(slice.cast::<T>().wrapping_add(self)) + } + } +} + +// SAFETY: `get`-returned pointer has the same provenance as `slice` and the offset is checked to +// not exceed the required bound. +unsafe impl<T> ProjectIndex<[T]> for core::ops::Range<usize> { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + let new_len = self.end.checked_sub(self.start)?; + if self.end > slice.len() { + return None; + } + Some(core::ptr::slice_from_raw_parts_mut( + slice.cast::<T>().wrapping_add(self.start), + new_len, + )) + } +} + +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeTo<usize> { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + (0..self.end).get(slice) + } +} + +// SAFETY: Safety requirement guaranteed by the forwarded impl. +unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeFrom<usize> { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + (self.start..slice.len()).get(slice) + } +} + +// SAFETY: `get` returned the pointer as is, so it always has the same provenance and offset of 0. +unsafe impl<T> ProjectIndex<[T]> for core::ops::RangeFull { + type Output = [T]; + + #[inline(always)] + fn get(self, slice: *mut [T]) -> Option<*mut [T]> { + Some(slice) + } +} + +/// A helper trait to perform field projection. +/// +/// This trait has a `DEREF` generic parameter so it can be implemented twice for types that +/// implement [`Deref`]. This will cause an ambiguity error and thus block [`Deref`] types being +/// used as base of projection, as they can inject unsoundness. Users therefore must not specify +/// `DEREF` and should always leave it to be inferred. +/// +/// # Safety +/// +/// `proj` may only invoke `f` with a valid allocation, as the documentation of [`Self::proj`] +/// describes. +#[doc(hidden)] +pub unsafe trait ProjectField<const DEREF: bool> { + /// Project a pointer to a type to a pointer of a field. + /// + /// `f` may only be invoked with a valid allocation so it can safely obtain raw pointers to + /// fields using `&raw mut`. + /// + /// This is needed because `base` might not point to a valid allocation, while `&raw mut` + /// requires pointers to be in bounds of a valid allocation. + /// + /// # Safety + /// + /// `f` must return a pointer in bounds of the provided pointer. + unsafe fn proj<F>(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F; +} + +// NOTE: in theory, this API should work for `T: ?Sized` and `F: ?Sized`, too. However, we cannot +// currently support that as we need to obtain a valid allocation that `&raw const` can operate on. +// +// SAFETY: `proj` invokes `f` with valid allocation. +unsafe impl<T> ProjectField<false> for T { + #[inline(always)] + unsafe fn proj<F>(base: *mut Self, f: impl FnOnce(*mut Self) -> *mut F) -> *mut F { + // Create a valid allocation to start projection, as `base` is not necessarily so. The + // memory is never actually used so it will be optimized out, so it should work even for + // very large `T` (`memoffset` crate also relies on this). To be extra certain, we also + // annotate `f` closure with `#[inline(always)]` in the macro. + let mut place = MaybeUninit::uninit(); + let place_base = place.as_mut_ptr(); + let field = f(place_base); + // SAFETY: `field` is in bounds from `base` per safety requirement. + let offset = unsafe { field.byte_offset_from(place_base) }; + // Use `wrapping_byte_offset` as `base` does not need to be of valid allocation. + base.wrapping_byte_offset(offset).cast() + } +} + +// SAFETY: Vacuously satisfied. +unsafe impl<T: Deref> ProjectField<true> for T { + #[inline(always)] + unsafe fn proj<F>(_: *mut Self, _: impl FnOnce(*mut Self) -> *mut F) -> *mut F { + build_error!("this function is a guard against `Deref` impl and is never invoked"); + } +} + +/// Create a projection from a raw pointer. +/// +/// The projected pointer is within the memory region marked by the input pointer. There is no +/// requirement that the input raw pointer needs to be valid, so this macro may be used for +/// projecting pointers outside normal address space, e.g. I/O pointers. However, if the input +/// pointer is valid, the projected pointer is also valid. +/// +/// Supported projections include field projections and index projections. +/// It is not allowed to project into types that implement custom [`Deref`] or +/// [`Index`](core::ops::Index). +/// +/// The macro has basic syntax of `kernel::ptr::project!(ptr, projection)`, where `ptr` is an +/// expression that evaluates to a raw pointer which serves as the base of projection. `projection` +/// can be a projection expression of form `.field` (normally identifier, or numeral in case of +/// tuple structs) or of form `[index]`. +/// +/// If a mutable pointer is needed, the macro input can be prefixed with the `mut` keyword, i.e. +/// `kernel::ptr::project!(mut ptr, projection)`. By default, a const pointer is created. +/// +/// `ptr::project!` macro can perform both fallible indexing and build-time checked indexing. +/// `[index]` form performs build-time bounds checking; if compiler fails to prove `[index]` is in +/// bounds, compilation will fail. `[index]?` can be used to perform runtime bounds checking; +/// `OutOfBound` error is raised via `?` if the index is out of bounds. +/// +/// # Examples +/// +/// Field projections are performed with `.field_name`: +/// +/// ``` +/// struct MyStruct { field: u32, } +/// let ptr: *const MyStruct = core::ptr::dangling(); +/// let field_ptr: *const u32 = kernel::ptr::project!(ptr, .field); +/// +/// struct MyTupleStruct(u32, u32); +/// +/// fn proj(ptr: *const MyTupleStruct) { +/// let field_ptr: *const u32 = kernel::ptr::project!(ptr, .1); +/// } +/// ``` +/// +/// Index projections are performed with `[index]`: +/// +/// ``` +/// fn proj(ptr: *const [u8; 32]) -> Result { +/// let field_ptr: *const u8 = kernel::ptr::project!(ptr, [1]); +/// // The following invocation, if uncommented, would fail the build. +/// // +/// // kernel::ptr::project!(ptr, [128]); +/// +/// // This will raise an `OutOfBound` error (which is convertible to `ERANGE`). +/// kernel::ptr::project!(ptr, [128]?); +/// Ok(()) +/// } +/// ``` +/// +/// If you need to match on the error instead of propagate, put the invocation inside a closure: +/// +/// ``` +/// let ptr: *const [u8; 32] = core::ptr::dangling(); +/// let field_ptr: Result<*const u8> = (|| -> Result<_> { +/// Ok(kernel::ptr::project!(ptr, [128]?)) +/// })(); +/// assert!(field_ptr.is_err()); +/// ``` +/// +/// For mutable pointers, put `mut` as the first token in macro invocation. +/// +/// ``` +/// let ptr: *mut [(u8, u16); 32] = core::ptr::dangling_mut(); +/// let field_ptr: *mut u16 = kernel::ptr::project!(mut ptr, [1].1); +/// ``` +#[macro_export] +macro_rules! project_pointer { + (@gen $ptr:ident, ) => {}; + // Field projection. `$field` needs to be `tt` to support tuple index like `.0`. + (@gen $ptr:ident, .$field:tt $($rest:tt)*) => { + // SAFETY: The provided closure always returns an in-bounds pointer. + let $ptr = unsafe { + $crate::ptr::projection::ProjectField::proj($ptr, #[inline(always)] |ptr| { + // Check unaligned field. Not all users (e.g. DMA) can handle unaligned + // projections. + if false { + let _ = &(*ptr).$field; + } + // SAFETY: `$field` is in bounds, and no implicit `Deref` is possible (if the + // type implements `Deref`, Rust cannot infer the generic parameter `DEREF`). + &raw mut (*ptr).$field + }) + }; + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + // Fallible index projection. + (@gen $ptr:ident, [$index:expr]? $($rest:tt)*) => { + let $ptr = $crate::ptr::projection::ProjectIndex::get($index, $ptr) + .ok_or($crate::ptr::projection::OutOfBound)?; + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + // Build-time checked index projection. + (@gen $ptr:ident, [$index:expr] $($rest:tt)*) => { + let $ptr = $crate::ptr::projection::ProjectIndex::index($index, $ptr); + $crate::ptr::project!(@gen $ptr, $($rest)*) + }; + (mut $ptr:expr, $($proj:tt)*) => {{ + let ptr: *mut _ = $ptr; + $crate::ptr::project!(@gen ptr, $($proj)*); + ptr + }}; + ($ptr:expr, $($proj:tt)*) => {{ + let ptr = <*const _>::cast_mut($ptr); + // We currently always project using mutable pointer, as it is not decided whether `&raw + // const` allows the resulting pointer to be mutated (see documentation of `addr_of!`). + $crate::ptr::project!(@gen ptr, $($proj)*); + ptr.cast_const() + }}; +}
diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs index 4f7837c..41e730c 100644 --- a/rust/kernel/regulator.rs +++ b/rust/kernel/regulator.rs
@@ -23,7 +23,10 @@ prelude::*, }; -use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull}; +use core::{ + marker::PhantomData, + mem::ManuallyDrop, // +}; mod private { pub trait Sealed {} @@ -229,15 +232,17 @@ pub fn devm_enable_optional(dev: &Device<Bound>, name: &CStr) -> Result { /// /// # Invariants /// -/// - `inner` is a non-null wrapper over a pointer to a `struct -/// regulator` obtained from [`regulator_get()`]. +/// - `inner` is a pointer obtained from a successful call to +/// [`regulator_get()`]. It is treated as an opaque token that may only be +/// accessed using C API methods (e.g., it may be `NULL` if the C API returns +/// `NULL`). /// /// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get pub struct Regulator<State> where State: RegulatorState, { - inner: NonNull<bindings::regulator>, + inner: *mut bindings::regulator, _phantom: PhantomData<State>, } @@ -249,7 +254,7 @@ pub fn set_voltage(&self, min_voltage: Voltage, max_voltage: Voltage) -> Result // SAFETY: Safe as per the type invariants of `Regulator`. to_result(unsafe { bindings::regulator_set_voltage( - self.inner.as_ptr(), + self.inner, min_voltage.as_microvolts(), max_voltage.as_microvolts(), ) @@ -259,7 +264,7 @@ pub fn set_voltage(&self, min_voltage: Voltage, max_voltage: Voltage) -> Result /// Gets the current voltage of the regulator. pub fn get_voltage(&self) -> Result<Voltage> { // SAFETY: Safe as per the type invariants of `Regulator`. - let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) }; + let voltage = unsafe { bindings::regulator_get_voltage(self.inner) }; to_result(voltage).map(|()| Voltage::from_microvolts(voltage)) } @@ -270,10 +275,8 @@ fn get_internal(dev: &Device, name: &CStr) -> Result<Regulator<T>> { // received from the C code. from_err_ptr(unsafe { bindings::regulator_get(dev.as_raw(), name.as_char_ptr()) })?; - // SAFETY: We can safely trust `inner` to be a pointer to a valid - // regulator if `ERR_PTR` was not returned. - let inner = unsafe { NonNull::new_unchecked(inner) }; - + // INVARIANT: `inner` is a pointer obtained from `regulator_get()`, and + // the call was successful. Ok(Self { inner, _phantom: PhantomData, @@ -282,12 +285,12 @@ fn get_internal(dev: &Device, name: &CStr) -> Result<Regulator<T>> { fn enable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_enable(self.inner) }) } fn disable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_disable(self.inner) }) } } @@ -349,7 +352,7 @@ impl<T: IsEnabled> Regulator<T> { /// Checks if the regulator is enabled. pub fn is_enabled(&self) -> bool { // SAFETY: Safe as per the type invariants of `Regulator`. - unsafe { bindings::regulator_is_enabled(self.inner.as_ptr()) != 0 } + unsafe { bindings::regulator_is_enabled(self.inner) != 0 } } } @@ -359,11 +362,11 @@ fn drop(&mut self) { // SAFETY: By the type invariants, we know that `self` owns a // reference on the enabled refcount, so it is safe to relinquish it // now. - unsafe { bindings::regulator_disable(self.inner.as_ptr()) }; + unsafe { bindings::regulator_disable(self.inner) }; } // SAFETY: By the type invariants, we know that `self` owns a reference, // so it is safe to relinquish it now. - unsafe { bindings::regulator_put(self.inner.as_ptr()) }; + unsafe { bindings::regulator_put(self.inner) }; } }
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index fa87779..3f891876 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs
@@ -664,13 +664,13 @@ fn write_str(&mut self, s: &str) -> fmt::Result { /// /// * The first byte of `buffer` is always zero. /// * The length of `buffer` is at least 1. -pub(crate) struct NullTerminatedFormatter<'a> { +pub struct NullTerminatedFormatter<'a> { buffer: &'a mut [u8], } impl<'a> NullTerminatedFormatter<'a> { /// Create a new [`Self`] instance. - pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> { + pub fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> { *(buffer.first_mut()?) = 0; // INVARIANT:
diff --git a/rust/pin-init/internal/src/init.rs b/rust/pin-init/internal/src/init.rs index 42936f9..2fe918f 100644 --- a/rust/pin-init/internal/src/init.rs +++ b/rust/pin-init/internal/src/init.rs
@@ -62,7 +62,6 @@ fn ident(&self) -> Option<&Ident> { enum InitializerAttribute { DefaultError(DefaultErrorAttribute), - DisableInitializedFieldAccess, } struct DefaultErrorAttribute { @@ -86,6 +85,7 @@ pub(crate) fn expand( let error = error.map_or_else( || { if let Some(default_error) = attrs.iter().fold(None, |acc, attr| { + #[expect(irrefutable_let_patterns)] if let InitializerAttribute::DefaultError(DefaultErrorAttribute { ty }) = attr { Some(ty.clone()) } else { @@ -145,22 +145,9 @@ fn assert_zeroable<T: ?::core::marker::Sized>(_: *mut T) }; // `mixed_site` ensures that the data is not accessible to the user-controlled code. let data = Ident::new("__data", Span::mixed_site()); - let init_fields = init_fields( - &fields, - pinned, - !attrs - .iter() - .any(|attr| matches!(attr, InitializerAttribute::DisableInitializedFieldAccess)), - &data, - &slot, - ); + let init_fields = init_fields(&fields, pinned, &data, &slot); let field_check = make_field_check(&fields, init_kind, &path); Ok(quote! {{ - // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return - // type and shadow it later when we insert the arbitrary user code. That way there will be - // no possibility of returning without `unsafe`. - struct __InitOk; - // Get the data about fields from the supplied type. // SAFETY: TODO let #data = unsafe { @@ -170,18 +157,15 @@ fn assert_zeroable<T: ?::core::marker::Sized>(_: *mut T) #path::#get_data() }; // Ensure that `#data` really is of type `#data` and help with type inference: - let init = ::pin_init::__internal::#data_trait::make_closure::<_, __InitOk, #error>( + let init = ::pin_init::__internal::#data_trait::make_closure::<_, #error>( #data, move |slot| { - { - // Shadow the structure so it cannot be used to return early. - struct __InitOk; - #zeroable_check - #this - #init_fields - #field_check - } - Ok(__InitOk) + #zeroable_check + #this + #init_fields + #field_check + // SAFETY: we are the `init!` macro that is allowed to call this. + Ok(unsafe { ::pin_init::__internal::InitOk::new() }) } ); let init = move |slot| -> ::core::result::Result<(), #error> { @@ -236,7 +220,6 @@ fn get_init_kind(rest: Option<(Token![..], Expr)>, dcx: &mut DiagCtxt) -> InitKi fn init_fields( fields: &Punctuated<InitializerField, Token![,]>, pinned: bool, - generate_initialized_accessors: bool, data: &Ident, slot: &Ident, ) -> TokenStream { @@ -260,6 +243,10 @@ fn init_fields( }); // Again span for better diagnostics let write = quote_spanned!(ident.span()=> ::core::ptr::write); + // NOTE: the field accessor ensures that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` below has the same requirement. let accessor = if pinned { let project_ident = format_ident!("__project_{ident}"); quote! { @@ -272,13 +259,6 @@ fn init_fields( unsafe { &mut (*#slot).#ident } } }; - let accessor = generate_initialized_accessors.then(|| { - quote! { - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; - } - }); quote! { #(#attrs)* { @@ -286,12 +266,18 @@ fn init_fields( // SAFETY: TODO unsafe { #write(::core::ptr::addr_of_mut!((*#slot).#ident), #value_ident) }; } - #accessor + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; } } InitializerKind::Init { ident, value, .. } => { // Again span for better diagnostics let init = format_ident!("init", span = value.span()); + // NOTE: the field accessor ensures that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` below has the same requirement. let (value_init, accessor) = if pinned { let project_ident = format_ident!("__project_{ident}"); ( @@ -326,20 +312,15 @@ fn init_fields( }, ) }; - let accessor = generate_initialized_accessors.then(|| { - quote! { - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; - } - }); quote! { #(#attrs)* { let #init = #value; #value_init } - #accessor + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; } } InitializerKind::Code { block: value, .. } => quote! { @@ -466,10 +447,6 @@ fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result<Self> { if a.path().is_ident("default_error") { a.parse_args::<DefaultErrorAttribute>() .map(InitializerAttribute::DefaultError) - } else if a.path().is_ident("disable_initialized_field_access") { - a.meta - .require_path_only() - .map(|_| InitializerAttribute::DisableInitializedFieldAccess) } else { Err(syn::Error::new_spanned(a, "unknown initializer attribute")) }
diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 90f18e9..90adbdc 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs
@@ -46,6 +46,24 @@ unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> { } } +/// Token type to signify successful initialization. +/// +/// Can only be constructed via the unsafe [`Self::new`] function. The initializer macros use this +/// token type to prevent returning `Ok` from an initializer without initializing all fields. +pub struct InitOk(()); + +impl InitOk { + /// Creates a new token. + /// + /// # Safety + /// + /// This function may only be called from the `init!` macro in `../internal/src/init.rs`. + #[inline(always)] + pub unsafe fn new() -> Self { + Self(()) + } +} + /// This trait is only implemented via the `#[pin_data]` proc-macro. It is used to facilitate /// the pin projections within the initializers. /// @@ -68,9 +86,10 @@ pub unsafe trait PinData: Copy { type Datee: ?Sized + HasPinData; /// Type inference helper function. - fn make_closure<F, O, E>(self, f: F) -> F + #[inline(always)] + fn make_closure<F, E>(self, f: F) -> F where - F: FnOnce(*mut Self::Datee) -> Result<O, E>, + F: FnOnce(*mut Self::Datee) -> Result<InitOk, E>, { f } @@ -98,9 +117,10 @@ pub unsafe trait InitData: Copy { type Datee: ?Sized + HasInitData; /// Type inference helper function. - fn make_closure<F, O, E>(self, f: F) -> F + #[inline(always)] + fn make_closure<F, E>(self, f: F) -> F where - F: FnOnce(*mut Self::Datee) -> Result<O, E>, + F: FnOnce(*mut Self::Datee) -> Result<InitOk, E>, { f }
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e7af02f..9f21088 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c
@@ -299,7 +299,7 @@ static bool check_ruleset_scope(const char *const env_var, /* clang-format on */ -#define LANDLOCK_ABI_LAST 7 +#define LANDLOCK_ABI_LAST 8 #define XSTR(s) #s #define STR(s) XSTR(s) @@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp) /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ supported_restrict_flags &= ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - + __attribute__((fallthrough)); + case 7: /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel "
diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index 9c45851..ce39b55 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs
@@ -68,7 +68,7 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; for (i, value) in TEST_VALUES.into_iter().enumerate() { - kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?; + kernel::dma_write!(ca, [i]?, MyStruct::new(value.0, value.1)); } let size = 4 * page::PAGE_SIZE; @@ -85,24 +85,26 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E } } +impl DmaSampleDriver { + fn check_dma(&self) -> Result { + for (i, value) in TEST_VALUES.into_iter().enumerate() { + let val0 = kernel::dma_read!(self.ca, [i]?.h); + let val1 = kernel::dma_read!(self.ca, [i]?.b); + + assert_eq!(val0, value.0); + assert_eq!(val1, value.1); + } + + Ok(()) + } +} + #[pinned_drop] impl PinnedDrop for DmaSampleDriver { fn drop(self: Pin<&mut Self>) { dev_info!(self.pdev, "Unload DMA test driver.\n"); - for (i, value) in TEST_VALUES.into_iter().enumerate() { - let val0 = kernel::dma_read!(self.ca[i].h); - let val1 = kernel::dma_read!(self.ca[i].b); - assert!(val0.is_ok()); - assert!(val1.is_ok()); - - if let Ok(val0) = val0 { - assert_eq!(val0, value.0); - } - if let Ok(val1) = val1 { - assert_eq!(val1, value.1); - } - } + assert!(self.check_dma().is_ok()); for (i, entry) in self.sgt.iter().enumerate() { dev_info!(
diff --git a/samples/workqueue/stall_detector/Makefile b/samples/workqueue/stall_detector/Makefile new file mode 100644 index 0000000..8849e85 --- /dev/null +++ b/samples/workqueue/stall_detector/Makefile
@@ -0,0 +1 @@ +obj-m += wq_stall.o
diff --git a/samples/workqueue/stall_detector/wq_stall.c b/samples/workqueue/stall_detector/wq_stall.c new file mode 100644 index 0000000..6f4a497 --- /dev/null +++ b/samples/workqueue/stall_detector/wq_stall.c
@@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * wq_stall - Test module for the workqueue stall detector. + * + * Deliberately creates a workqueue stall so the watchdog fires and + * prints diagnostic output. Useful for verifying that the stall + * detector correctly identifies stuck workers and produces useful + * backtraces. + * + * The stall is triggered by clearing PF_WQ_WORKER before sleeping, + * which hides the worker from the concurrency manager. A second + * work item queued on the same pool then sits in the worklist with + * no worker available to process it. + * + * After ~30s the workqueue watchdog fires: + * BUG: workqueue lockup - pool cpus=N ... + * + * Build: + * make -C <kernel tree> M=samples/workqueue/stall_detector modules + * + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2026 Breno Leitao <leitao@debian.org> + */ + +#include <linux/module.h> +#include <linux/workqueue.h> +#include <linux/wait.h> +#include <linux/atomic.h> +#include <linux/sched.h> + +static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head); +static atomic_t wake_condition = ATOMIC_INIT(0); +static struct work_struct stall_work1; +static struct work_struct stall_work2; + +static void stall_work2_fn(struct work_struct *work) +{ + pr_info("wq_stall: second work item finally ran\n"); +} + +static void stall_work1_fn(struct work_struct *work) +{ + pr_info("wq_stall: first work item running on cpu %d\n", + raw_smp_processor_id()); + + /* + * Queue second item while we're still counted as running + * (pool->nr_running > 0). Since schedule_work() on a per-CPU + * workqueue targets raw_smp_processor_id(), item 2 lands on the + * same pool. __queue_work -> kick_pool -> need_more_worker() + * sees nr_running > 0 and does NOT wake a new worker. + */ + schedule_work(&stall_work2); + + /* + * Hide from the workqueue concurrency manager. Without + * PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(), + * so nr_running is never decremented and no replacement + * worker is created. Item 2 stays stuck in pool->worklist. + */ + current->flags &= ~PF_WQ_WORKER; + + pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n"); + pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n"); + wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0); + + /* Restore so process_one_work() cleanup works correctly */ + current->flags |= PF_WQ_WORKER; + pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n"); +} + +static int __init wq_stall_init(void) +{ + pr_info("wq_stall: loading\n"); + + INIT_WORK(&stall_work1, stall_work1_fn); + INIT_WORK(&stall_work2, stall_work2_fn); + schedule_work(&stall_work1); + + return 0; +} + +static void __exit wq_stall_exit(void) +{ + pr_info("wq_stall: unloading\n"); + atomic_set(&wake_condition, 1); + wake_up(&stall_wq_head); + flush_work(&stall_work1); + flush_work(&stall_work2); + pr_info("wq_stall: all work flushed, module unloaded\n"); +} + +module_init(wq_stall_init); +module_exit(wq_stall_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse"); +MODULE_AUTHOR("Breno Leitao <leitao@debian.org>");
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 32e209b..3652b85 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build
@@ -310,16 +310,18 @@ # The features in this list are the ones allowed for non-`rust/` code. # +# - Stable since Rust 1.79.0: `feature(slice_ptr_len)`. # - Stable since Rust 1.81.0: `feature(lint_reasons)`. # - Stable since Rust 1.82.0: `feature(asm_const)`, # `feature(offset_of_nested)`, `feature(raw_ref_op)`. +# - Stable since Rust 1.84.0: `feature(strict_provenance)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # - To be determined: `feature(used_with_arg)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,used_with_arg +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree
diff --git a/scripts/coccinelle/api/kmalloc_objs.cocci b/scripts/coccinelle/api/kmalloc_objs.cocci index db12b7b..e9a415b 100644 --- a/scripts/coccinelle/api/kmalloc_objs.cocci +++ b/scripts/coccinelle/api/kmalloc_objs.cocci
@@ -122,3 +122,14 @@ - ALLOC(struct_size_t(TYPE, FLEX, COUNT), GFP) + ALLOC_FLEX(TYPE, FLEX, COUNT, GFP) ) + +@drop_gfp_kernel depends on patch && !(file in "tools") && !(file in "samples")@ +identifier ALLOC = {kmalloc_obj,kmalloc_objs,kmalloc_flex, + kzalloc_obj,kzalloc_objs,kzalloc_flex, + kvmalloc_obj,kvmalloc_objs,kvmalloc_flex, + kvzalloc_obj,kvzalloc_objs,kvzalloc_flex}; +@@ + + ALLOC(... +- , GFP_KERNEL + )
diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index efdcf07..cabcd14 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y
@@ -325,8 +325,8 @@ { $$ = $4; } | direct_declarator BRACKET_PHRASE { $$ = $2; } - | '(' declarator ')' - { $$ = $3; } + | '(' attribute_opt declarator ')' + { $$ = $4; } ; /* Nested declarators differ from regular declarators in that they do
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 735e1de..f08e086 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh
@@ -151,6 +151,7 @@ if ! "$AWK" -v prefix="$CONFIG_PREFIX" \ -v warnoverride="$WARNOVERRIDE" \ -v strict="$STRICT" \ + -v outfile="$TMP_FILE.new" \ -v builtin="$BUILTIN" \ -v warnredun="$WARNREDUN" ' BEGIN { @@ -195,7 +196,7 @@ # First pass: read merge file, store all lines and index FILENAME == ARGV[1] { - mergefile = FILENAME + mergefile = FILENAME merge_lines[FNR] = $0 merge_total = FNR cfg = get_cfg($0) @@ -212,17 +213,17 @@ # Not a config or not in merge file - keep it if (cfg == "" || !(cfg in merge_cfg)) { - print $0 >> ARGV[3] + print $0 >> outfile next } - prev_val = $0 + prev_val = $0 new_val = merge_cfg[cfg] # BUILTIN: do not demote y to m if (builtin == "true" && new_val ~ /=m$/ && prev_val ~ /=y$/) { warn_builtin(cfg, prev_val, new_val) - print $0 >> ARGV[3] + print $0 >> outfile skip_merge[merge_cfg_line[cfg]] = 1 next } @@ -235,7 +236,7 @@ # "=n" is the same as "is not set" if (prev_val ~ /=n$/ && new_val ~ / is not set$/) { - print $0 >> ARGV[3] + print $0 >> outfile next } @@ -246,25 +247,20 @@ } } - # output file, skip all lines - FILENAME == ARGV[3] { - nextfile - } - END { # Newline in case base file lacks trailing newline - print "" >> ARGV[3] + print "" >> outfile # Append merge file, skipping lines marked for builtin preservation for (i = 1; i <= merge_total; i++) { if (!(i in skip_merge)) { - print merge_lines[i] >> ARGV[3] + print merge_lines[i] >> outfile } } if (strict_violated) { exit 1 } }' \ - "$ORIG_MERGE_FILE" "$TMP_FILE" "$TMP_FILE.new"; then + "$ORIG_MERGE_FILE" "$TMP_FILE"; then # awk exited non-zero, strict mode was violated STRICT_MODE_VIOLATED=true fi @@ -381,7 +377,7 @@ STRICT_MODE_VIOLATED=true fi -if [ "$STRICT" == "true" ] && [ "$STRICT_MODE_VIOLATED" == "true" ]; then +if [ "$STRICT" = "true" ] && [ "$STRICT_MODE_VIOLATED" = "true" ]; then echo "Requested and effective config differ" exit 1 fi
diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 809e198..7b82c75 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build
@@ -285,15 +285,14 @@ # application from appending it with '+' due to a dirty git working tree. set_kernelversion() { local file="$SRC/scripts/setlocalversion" - local localversion + local kernelrelease stash_file "$file" - localversion="$(cd "$SRC" && make --no-print-directory kernelversion)" - localversion="$(cd "$SRC" && KERNELVERSION="$localversion" ./scripts/setlocalversion)" - [[ -z "$localversion" ]] && die "setlocalversion failed" + kernelrelease="$(cd "$SRC" && make syncconfig &>/dev/null && make -s kernelrelease)" + [[ -z "$kernelrelease" ]] && die "failed to get kernel version" - sed -i "2i echo $localversion; exit 0" scripts/setlocalversion + sed -i "2i echo $kernelrelease; exit 0" scripts/setlocalversion } get_patch_files() {
diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 2576cf7..f12e1ff 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build
@@ -32,6 +32,10 @@ echo tools/objtool/objtool fi + if is_enabled CONFIG_DEBUG_INFO_BTF_MODULES; then + echo tools/bpf/resolve_btfids/resolve_btfids + fi + echo Module.symvers echo "arch/${SRCARCH}/include/generated" echo include/config/auto.conf
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2f84bd2..242c71b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c
@@ -32,6 +32,7 @@ #include "include/crypto.h" #include "include/ipc.h" #include "include/label.h" +#include "include/lib.h" #include "include/policy.h" #include "include/policy_ns.h" #include "include/resource.h" @@ -62,6 +63,7 @@ * securityfs and apparmorfs filesystems. */ +#define IREF_POISON 101 /* * support fns @@ -79,7 +81,7 @@ static void rawdata_f_data_free(struct rawdata_f_data *private) if (!private) return; - aa_put_loaddata(private->loaddata); + aa_put_i_loaddata(private->loaddata); kvfree(private); } @@ -153,6 +155,71 @@ static int aafs_show_path(struct seq_file *seq, struct dentry *dentry) return 0; } +static struct aa_ns *get_ns_common_ref(struct aa_common_ref *ref) +{ + if (ref) { + struct aa_label *reflabel = container_of(ref, struct aa_label, + count); + return aa_get_ns(labels_ns(reflabel)); + } + + return NULL; +} + +static struct aa_proxy *get_proxy_common_ref(struct aa_common_ref *ref) +{ + if (ref) + return aa_get_proxy(container_of(ref, struct aa_proxy, count)); + + return NULL; +} + +static struct aa_loaddata *get_loaddata_common_ref(struct aa_common_ref *ref) +{ + if (ref) + return aa_get_i_loaddata(container_of(ref, struct aa_loaddata, + count)); + return NULL; +} + +static void aa_put_common_ref(struct aa_common_ref *ref) +{ + if (!ref) + return; + + switch (ref->reftype) { + case REF_RAWDATA: + aa_put_i_loaddata(container_of(ref, struct aa_loaddata, + count)); + break; + case REF_PROXY: + aa_put_proxy(container_of(ref, struct aa_proxy, + count)); + break; + case REF_NS: + /* ns count is held on its unconfined label */ + aa_put_ns(labels_ns(container_of(ref, struct aa_label, count))); + break; + default: + AA_BUG(true, "unknown refcount type"); + break; + } +} + +static void aa_get_common_ref(struct aa_common_ref *ref) +{ + kref_get(&ref->count); +} + +static void aafs_evict(struct inode *inode) +{ + struct aa_common_ref *ref = inode->i_private; + + clear_inode(inode); + aa_put_common_ref(ref); + inode->i_private = (void *) IREF_POISON; +} + static void aafs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) @@ -162,6 +229,7 @@ static void aafs_free_inode(struct inode *inode) static const struct super_operations aafs_super_ops = { .statfs = simple_statfs, + .evict_inode = aafs_evict, .free_inode = aafs_free_inode, .show_path = aafs_show_path, }; @@ -262,7 +330,8 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry, * aafs_remove(). Will return ERR_PTR on failure. */ static struct dentry *aafs_create(const char *name, umode_t mode, - struct dentry *parent, void *data, void *link, + struct dentry *parent, + struct aa_common_ref *data, void *link, const struct file_operations *fops, const struct inode_operations *iops) { @@ -299,6 +368,9 @@ static struct dentry *aafs_create(const char *name, umode_t mode, goto fail_dentry; inode_unlock(dir); + if (data) + aa_get_common_ref(data); + return dentry; fail_dentry: @@ -323,7 +395,8 @@ static struct dentry *aafs_create(const char *name, umode_t mode, * see aafs_create */ static struct dentry *aafs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, + struct dentry *parent, + struct aa_common_ref *data, const struct file_operations *fops) { return aafs_create(name, mode, parent, data, NULL, fops, NULL); @@ -409,7 +482,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, data->size = copy_size; if (copy_from_user(data->data, userbuf, copy_size)) { - aa_put_loaddata(data); + /* trigger free - don't need to put pcount */ + aa_put_i_loaddata(data); return ERR_PTR(-EFAULT); } @@ -417,7 +491,8 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, } static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, - loff_t *pos, struct aa_ns *ns) + loff_t *pos, struct aa_ns *ns, + const struct cred *ocred) { struct aa_loaddata *data; struct aa_label *label; @@ -428,7 +503,7 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, /* high level check about policy management - fine grained in * below after unpack */ - error = aa_may_manage_policy(current_cred(), label, ns, mask); + error = aa_may_manage_policy(current_cred(), label, ns, ocred, mask); if (error) goto end_section; @@ -436,7 +511,10 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, error = PTR_ERR(data); if (!IS_ERR(data)) { error = aa_replace_profiles(ns, label, mask, data); - aa_put_loaddata(data); + /* put pcount, which will put count and free if no + * profiles referencing it. + */ + aa_put_profile_loaddata(data); } end_section: end_current_label_crit_section(label); @@ -448,8 +526,9 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, static ssize_t profile_load(struct file *f, const char __user *buf, size_t size, loff_t *pos) { - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); - int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); + int error = policy_update(AA_MAY_LOAD_POLICY, buf, size, pos, ns, + f->f_cred); aa_put_ns(ns); @@ -465,9 +544,9 @@ static const struct file_operations aa_fs_profile_load = { static ssize_t profile_replace(struct file *f, const char __user *buf, size_t size, loff_t *pos) { - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); int error = policy_update(AA_MAY_LOAD_POLICY | AA_MAY_REPLACE_POLICY, - buf, size, pos, ns); + buf, size, pos, ns, f->f_cred); aa_put_ns(ns); return error; @@ -485,14 +564,14 @@ static ssize_t profile_remove(struct file *f, const char __user *buf, struct aa_loaddata *data; struct aa_label *label; ssize_t error; - struct aa_ns *ns = aa_get_ns(f->f_inode->i_private); + struct aa_ns *ns = get_ns_common_ref(f->f_inode->i_private); label = begin_current_label_crit_section(); /* high level check about policy management - fine grained in * below after unpack */ error = aa_may_manage_policy(current_cred(), label, ns, - AA_MAY_REMOVE_POLICY); + f->f_cred, AA_MAY_REMOVE_POLICY); if (error) goto out; @@ -506,7 +585,7 @@ static ssize_t profile_remove(struct file *f, const char __user *buf, if (!IS_ERR(data)) { data->data[size] = 0; error = aa_remove_profiles(ns, label, data->data, size); - aa_put_loaddata(data); + aa_put_profile_loaddata(data); } out: end_current_label_crit_section(label); @@ -575,7 +654,7 @@ static int ns_revision_open(struct inode *inode, struct file *file) if (!rev) return -ENOMEM; - rev->ns = aa_get_ns(inode->i_private); + rev->ns = get_ns_common_ref(inode->i_private); if (!rev->ns) rev->ns = aa_get_current_ns(); file->private_data = rev; @@ -1061,7 +1140,7 @@ static const struct file_operations seq_profile_ ##NAME ##_fops = { \ static int seq_profile_open(struct inode *inode, struct file *file, int (*show)(struct seq_file *, void *)) { - struct aa_proxy *proxy = aa_get_proxy(inode->i_private); + struct aa_proxy *proxy = get_proxy_common_ref(inode->i_private); int error = single_open(file, show, proxy); if (error) { @@ -1253,18 +1332,17 @@ static const struct file_operations seq_rawdata_ ##NAME ##_fops = { \ static int seq_rawdata_open(struct inode *inode, struct file *file, int (*show)(struct seq_file *, void *)) { - struct aa_loaddata *data = __aa_get_loaddata(inode->i_private); + struct aa_loaddata *data = get_loaddata_common_ref(inode->i_private); int error; if (!data) - /* lost race this ent is being reaped */ return -ENOENT; error = single_open(file, show, data); if (error) { AA_BUG(file->private_data && ((struct seq_file *)file->private_data)->private); - aa_put_loaddata(data); + aa_put_i_loaddata(data); } return error; @@ -1275,7 +1353,7 @@ static int seq_rawdata_release(struct inode *inode, struct file *file) struct seq_file *seq = (struct seq_file *) file->private_data; if (seq) - aa_put_loaddata(seq->private); + aa_put_i_loaddata(seq->private); return single_release(inode, file); } @@ -1387,9 +1465,8 @@ static int rawdata_open(struct inode *inode, struct file *file) if (!aa_current_policy_view_capable(NULL)) return -EACCES; - loaddata = __aa_get_loaddata(inode->i_private); + loaddata = get_loaddata_common_ref(inode->i_private); if (!loaddata) - /* lost race: this entry is being reaped */ return -ENOENT; private = rawdata_f_data_alloc(loaddata->size); @@ -1414,7 +1491,7 @@ static int rawdata_open(struct inode *inode, struct file *file) return error; fail_private_alloc: - aa_put_loaddata(loaddata); + aa_put_i_loaddata(loaddata); return error; } @@ -1431,7 +1508,6 @@ static void remove_rawdata_dents(struct aa_loaddata *rawdata) for (i = 0; i < AAFS_LOADDATA_NDENTS; i++) { if (!IS_ERR_OR_NULL(rawdata->dents[i])) { - /* no refcounts on i_private */ aafs_remove(rawdata->dents[i]); rawdata->dents[i] = NULL; } @@ -1474,35 +1550,37 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata) return PTR_ERR(dir); rawdata->dents[AAFS_LOADDATA_DIR] = dir; - dent = aafs_create_file("abi", S_IFREG | 0444, dir, rawdata, + dent = aafs_create_file("abi", S_IFREG | 0444, dir, &rawdata->count, &seq_rawdata_abi_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_ABI] = dent; - dent = aafs_create_file("revision", S_IFREG | 0444, dir, rawdata, - &seq_rawdata_revision_fops); + dent = aafs_create_file("revision", S_IFREG | 0444, dir, + &rawdata->count, + &seq_rawdata_revision_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_REVISION] = dent; if (aa_g_hash_policy) { dent = aafs_create_file("sha256", S_IFREG | 0444, dir, - rawdata, &seq_rawdata_hash_fops); + &rawdata->count, + &seq_rawdata_hash_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_HASH] = dent; } dent = aafs_create_file("compressed_size", S_IFREG | 0444, dir, - rawdata, + &rawdata->count, &seq_rawdata_compressed_size_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_COMPRESSED_SIZE] = dent; - dent = aafs_create_file("raw_data", S_IFREG | 0444, - dir, rawdata, &rawdata_fops); + dent = aafs_create_file("raw_data", S_IFREG | 0444, dir, + &rawdata->count, &rawdata_fops); if (IS_ERR(dent)) goto fail; rawdata->dents[AAFS_LOADDATA_DATA] = dent; @@ -1510,13 +1588,11 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata) rawdata->ns = aa_get_ns(ns); list_add(&rawdata->list, &ns->rawdata_list); - /* no refcount on inode rawdata */ return 0; fail: remove_rawdata_dents(rawdata); - return PTR_ERR(dent); } #endif /* CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */ @@ -1540,13 +1616,10 @@ void __aafs_profile_rmdir(struct aa_profile *profile) __aafs_profile_rmdir(child); for (i = AAFS_PROF_SIZEOF - 1; i >= 0; --i) { - struct aa_proxy *proxy; if (!profile->dents[i]) continue; - proxy = d_inode(profile->dents[i])->i_private; aafs_remove(profile->dents[i]); - aa_put_proxy(proxy); profile->dents[i] = NULL; } } @@ -1580,14 +1653,7 @@ static struct dentry *create_profile_file(struct dentry *dir, const char *name, struct aa_profile *profile, const struct file_operations *fops) { - struct aa_proxy *proxy = aa_get_proxy(profile->label.proxy); - struct dentry *dent; - - dent = aafs_create_file(name, S_IFREG | 0444, dir, proxy, fops); - if (IS_ERR(dent)) - aa_put_proxy(proxy); - - return dent; + return aafs_create_file(name, S_IFREG | 0444, dir, &profile->label.proxy->count, fops); } #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY @@ -1637,7 +1703,8 @@ static const char *rawdata_get_link_base(struct dentry *dentry, struct delayed_call *done, const char *name) { - struct aa_proxy *proxy = inode->i_private; + struct aa_common_ref *ref = inode->i_private; + struct aa_proxy *proxy = container_of(ref, struct aa_proxy, count); struct aa_label *label; struct aa_profile *profile; char *target; @@ -1779,27 +1846,24 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) if (profile->rawdata) { if (aa_g_hash_policy) { dent = aafs_create("raw_sha256", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, - &rawdata_link_sha256_iops); + &profile->label.proxy->count, NULL, + NULL, &rawdata_link_sha256_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_HASH] = dent; } dent = aafs_create("raw_abi", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, + &profile->label.proxy->count, NULL, NULL, &rawdata_link_abi_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_ABI] = dent; dent = aafs_create("raw_data", S_IFLNK | 0444, dir, - profile->label.proxy, NULL, NULL, + &profile->label.proxy->count, NULL, NULL, &rawdata_link_data_iops); if (IS_ERR(dent)) goto fail; - aa_get_proxy(profile->label.proxy); profile->dents[AAFS_PROF_RAW_DATA] = dent; } #endif /*CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */ @@ -1830,13 +1894,13 @@ static struct dentry *ns_mkdir_op(struct mnt_idmap *idmap, struct inode *dir, int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(current_cred(), label, NULL, + error = aa_may_manage_policy(current_cred(), label, NULL, NULL, AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return ERR_PTR(error); - parent = aa_get_ns(dir->i_private); + parent = get_ns_common_ref(dir->i_private); AA_BUG(d_inode(ns_subns_dir(parent)) != dir); /* we have to unlock and then relock to get locking order right @@ -1880,13 +1944,13 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry) int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(current_cred(), label, NULL, + error = aa_may_manage_policy(current_cred(), label, NULL, NULL, AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return error; - parent = aa_get_ns(dir->i_private); + parent = get_ns_common_ref(dir->i_private); /* rmdir calls the generic securityfs functions to remove files * from the apparmor dir. It is up to the apparmor ns locking * to avoid races. @@ -1956,27 +2020,6 @@ void __aafs_ns_rmdir(struct aa_ns *ns) __aa_fs_list_remove_rawdata(ns); - if (ns_subns_dir(ns)) { - sub = d_inode(ns_subns_dir(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subload(ns)) { - sub = d_inode(ns_subload(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subreplace(ns)) { - sub = d_inode(ns_subreplace(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subremove(ns)) { - sub = d_inode(ns_subremove(ns))->i_private; - aa_put_ns(sub); - } - if (ns_subrevision(ns)) { - sub = d_inode(ns_subrevision(ns))->i_private; - aa_put_ns(sub); - } - for (i = AAFS_NS_SIZEOF - 1; i >= 0; --i) { aafs_remove(ns->dents[i]); ns->dents[i] = NULL; @@ -2001,40 +2044,40 @@ static int __aafs_ns_mkdir_entries(struct aa_ns *ns, struct dentry *dir) return PTR_ERR(dent); ns_subdata_dir(ns) = dent; - dent = aafs_create_file("revision", 0444, dir, ns, + dent = aafs_create_file("revision", 0444, dir, + &ns->unconfined->label.count, &aa_fs_ns_revision_fops); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subrevision(ns) = dent; - dent = aafs_create_file(".load", 0640, dir, ns, - &aa_fs_profile_load); + dent = aafs_create_file(".load", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_load); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subload(ns) = dent; - dent = aafs_create_file(".replace", 0640, dir, ns, - &aa_fs_profile_replace); + dent = aafs_create_file(".replace", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_replace); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subreplace(ns) = dent; - dent = aafs_create_file(".remove", 0640, dir, ns, - &aa_fs_profile_remove); + dent = aafs_create_file(".remove", 0640, dir, + &ns->unconfined->label.count, + &aa_fs_profile_remove); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subremove(ns) = dent; /* use create_dentry so we can supply private data */ - dent = aafs_create("namespaces", S_IFDIR | 0755, dir, ns, NULL, NULL, - &ns_dir_inode_operations); + dent = aafs_create("namespaces", S_IFDIR | 0755, dir, + &ns->unconfined->label.count, + NULL, NULL, &ns_dir_inode_operations); if (IS_ERR(dent)) return PTR_ERR(dent); - aa_get_ns(ns); ns_subns_dir(ns) = dent; return 0;
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index c0812db..335f219 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h
@@ -102,7 +102,7 @@ enum label_flags { struct aa_label; struct aa_proxy { - struct kref count; + struct aa_common_ref count; struct aa_label __rcu *label; }; @@ -125,7 +125,7 @@ struct label_it { * vec: vector of profiles comprising the compound label */ struct aa_label { - struct kref count; + struct aa_common_ref count; struct rb_node node; struct rcu_head rcu; struct aa_proxy *proxy; @@ -357,7 +357,7 @@ int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules, */ static inline struct aa_label *__aa_get_label(struct aa_label *l) { - if (l && kref_get_unless_zero(&l->count)) + if (l && kref_get_unless_zero(&l->count.count)) return l; return NULL; @@ -366,7 +366,7 @@ static inline struct aa_label *__aa_get_label(struct aa_label *l) static inline struct aa_label *aa_get_label(struct aa_label *l) { if (l) - kref_get(&(l->count)); + kref_get(&(l->count.count)); return l; } @@ -386,7 +386,7 @@ static inline struct aa_label *aa_get_label_rcu(struct aa_label __rcu **l) rcu_read_lock(); do { c = rcu_dereference(*l); - } while (c && !kref_get_unless_zero(&c->count)); + } while (c && !kref_get_unless_zero(&c->count.count)); rcu_read_unlock(); return c; @@ -426,7 +426,7 @@ static inline struct aa_label *aa_get_newest_label(struct aa_label *l) static inline void aa_put_label(struct aa_label *l) { if (l) - kref_put(&l->count, aa_label_kref); + kref_put(&l->count.count, aa_label_kref); } /* wrapper fn to indicate semantics of the check */ @@ -443,7 +443,7 @@ void aa_proxy_kref(struct kref *kref); static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy) { if (proxy) - kref_get(&(proxy->count)); + kref_get(&(proxy->count.count)); return proxy; } @@ -451,7 +451,7 @@ static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy) static inline void aa_put_proxy(struct aa_proxy *proxy) { if (proxy) - kref_put(&proxy->count, aa_proxy_kref); + kref_put(&proxy->count.count, aa_proxy_kref); } void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new);
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 1c5d1f6..8c6ce84 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h
@@ -102,6 +102,18 @@ void aa_info_message(const char *str); /* Security blob offsets */ extern struct lsm_blob_sizes apparmor_blob_sizes; +enum reftype { + REF_NS, + REF_PROXY, + REF_RAWDATA, +}; + +/* common reference count used by data the shows up in aafs */ +struct aa_common_ref { + struct kref count; + enum reftype reftype; +}; + /** * aa_strneq - compare null terminated @str to a non null terminated substring * @str: a null terminated string
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h index 0dde8ed..7accb1c3 100644 --- a/security/apparmor/include/match.h +++ b/security/apparmor/include/match.h
@@ -185,6 +185,7 @@ static inline void aa_put_dfa(struct aa_dfa *dfa) #define MATCH_FLAG_DIFF_ENCODE 0x80000000 #define MARK_DIFF_ENCODE 0x40000000 #define MATCH_FLAG_OOB_TRANSITION 0x20000000 +#define MARK_DIFF_ENCODE_VERIFIED 0x10000000 #define MATCH_FLAGS_MASK 0xff000000 #define MATCH_FLAGS_VALID (MATCH_FLAG_DIFF_ENCODE | MATCH_FLAG_OOB_TRANSITION) #define MATCH_FLAGS_INVALID (MATCH_FLAGS_MASK & ~MATCH_FLAGS_VALID)
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 5115eba..3895f87 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h
@@ -379,7 +379,7 @@ static inline bool profile_mediates_safe(struct aa_profile *profile, static inline struct aa_profile *aa_get_profile(struct aa_profile *p) { if (p) - kref_get(&(p->label.count)); + kref_get(&(p->label.count.count)); return p; } @@ -393,7 +393,7 @@ static inline struct aa_profile *aa_get_profile(struct aa_profile *p) */ static inline struct aa_profile *aa_get_profile_not0(struct aa_profile *p) { - if (p && kref_get_unless_zero(&p->label.count)) + if (p && kref_get_unless_zero(&p->label.count.count)) return p; return NULL; @@ -413,7 +413,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p) rcu_read_lock(); do { c = rcu_dereference(*p); - } while (c && !kref_get_unless_zero(&c->label.count)); + } while (c && !kref_get_unless_zero(&c->label.count.count)); rcu_read_unlock(); return c; @@ -426,7 +426,7 @@ static inline struct aa_profile *aa_get_profile_rcu(struct aa_profile __rcu **p) static inline void aa_put_profile(struct aa_profile *p) { if (p) - kref_put(&p->label.count, aa_label_kref); + kref_put(&p->label.count.count, aa_label_kref); } static inline int AUDIT_MODE(struct aa_profile *profile) @@ -443,7 +443,7 @@ bool aa_policy_admin_capable(const struct cred *subj_cred, struct aa_label *label, struct aa_ns *ns); int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, struct aa_ns *ns, - u32 mask); + const struct cred *ocred, u32 mask); bool aa_current_policy_view_capable(struct aa_ns *ns); bool aa_current_policy_admin_capable(struct aa_ns *ns);
diff --git a/security/apparmor/include/policy_ns.h b/security/apparmor/include/policy_ns.h index d646070..cc6e841 100644 --- a/security/apparmor/include/policy_ns.h +++ b/security/apparmor/include/policy_ns.h
@@ -18,6 +18,8 @@ #include "label.h" #include "policy.h" +/* Match max depth of user namespaces */ +#define MAX_NS_DEPTH 32 /* struct aa_ns_acct - accounting of profiles in namespace * @max_size: maximum space allowed for all profiles in namespace
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h index a6f4611..e5a95dc 100644 --- a/security/apparmor/include/policy_unpack.h +++ b/security/apparmor/include/policy_unpack.h
@@ -87,17 +87,29 @@ struct aa_ext { u32 version; }; -/* - * struct aa_loaddata - buffer of policy raw_data set +/* struct aa_loaddata - buffer of policy raw_data set + * @count: inode/filesystem refcount - use aa_get_i_loaddata() + * @pcount: profile refcount - use aa_get_profile_loaddata() + * @list: list the loaddata is on + * @work: used to do a delayed cleanup + * @dents: refs to dents created in aafs + * @ns: the namespace this loaddata was loaded into + * @name: + * @size: the size of the data that was loaded + * @compressed_size: the size of the data when it is compressed + * @revision: unique revision count that this data was loaded as + * @abi: the abi number the loaddata uses + * @hash: a hash of the loaddata, used to help dedup data * - * there is no loaddata ref for being on ns list, nor a ref from - * d_inode(@dentry) when grab a ref from these, @ns->lock must be held - * && __aa_get_loaddata() needs to be used, and the return value - * checked, if NULL the loaddata is already being reaped and should be - * considered dead. + * There is no loaddata ref for being on ns->rawdata_list, so + * @ns->lock must be held when walking the list. Dentries and + * inode opens hold refs on @count; profiles hold refs on @pcount. + * When the last @pcount drops, do_ploaddata_rmfs() removes the + * fs entries and drops the associated @count ref. */ struct aa_loaddata { - struct kref count; + struct aa_common_ref count; + struct kref pcount; struct list_head list; struct work_struct work; struct dentry *dents[AAFS_LOADDATA_NDENTS]; @@ -119,50 +131,53 @@ struct aa_loaddata { int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, const char **ns); /** - * __aa_get_loaddata - get a reference count to uncounted data reference - * @data: reference to get a count on - * - * Returns: pointer to reference OR NULL if race is lost and reference is - * being repeated. - * Requires: @data->ns->lock held, and the return code MUST be checked - * - * Use only from inode->i_private and @data->list found references - */ -static inline struct aa_loaddata * -__aa_get_loaddata(struct aa_loaddata *data) -{ - if (data && kref_get_unless_zero(&(data->count))) - return data; - - return NULL; -} - -/** * aa_get_loaddata - get a reference count from a counted data reference * @data: reference to get a count on * - * Returns: point to reference + * Returns: pointer to reference * Requires: @data to have a valid reference count on it. It is a bug * if the race to reap can be encountered when it is used. */ static inline struct aa_loaddata * -aa_get_loaddata(struct aa_loaddata *data) +aa_get_i_loaddata(struct aa_loaddata *data) { - struct aa_loaddata *tmp = __aa_get_loaddata(data); - AA_BUG(data && !tmp); + if (data) + kref_get(&(data->count.count)); + return data; +} - return tmp; + +/** + * aa_get_profile_loaddata - get a profile reference count on loaddata + * @data: reference to get a count on + * + * Returns: pointer to reference + * Requires: @data to have a valid reference count on it. + */ +static inline struct aa_loaddata * +aa_get_profile_loaddata(struct aa_loaddata *data) +{ + if (data) + kref_get(&(data->pcount)); + return data; } void __aa_loaddata_update(struct aa_loaddata *data, long revision); bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r); void aa_loaddata_kref(struct kref *kref); +void aa_ploaddata_kref(struct kref *kref); struct aa_loaddata *aa_loaddata_alloc(size_t size); -static inline void aa_put_loaddata(struct aa_loaddata *data) +static inline void aa_put_i_loaddata(struct aa_loaddata *data) { if (data) - kref_put(&data->count, aa_loaddata_kref); + kref_put(&data->count.count, aa_loaddata_kref); +} + +static inline void aa_put_profile_loaddata(struct aa_loaddata *data) +{ + if (data) + kref_put(&data->pcount, aa_ploaddata_kref); } #if IS_ENABLED(CONFIG_KUNIT)
diff --git a/security/apparmor/label.c b/security/apparmor/label.c index e478283..3a721fd 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c
@@ -52,7 +52,8 @@ static void free_proxy(struct aa_proxy *proxy) void aa_proxy_kref(struct kref *kref) { - struct aa_proxy *proxy = container_of(kref, struct aa_proxy, count); + struct aa_proxy *proxy = container_of(kref, struct aa_proxy, + count.count); free_proxy(proxy); } @@ -63,7 +64,8 @@ struct aa_proxy *aa_alloc_proxy(struct aa_label *label, gfp_t gfp) new = kzalloc_obj(struct aa_proxy, gfp); if (new) { - kref_init(&new->count); + kref_init(&new->count.count); + new->count.reftype = REF_PROXY; rcu_assign_pointer(new->label, aa_get_label(label)); } return new; @@ -375,7 +377,8 @@ static void label_free_rcu(struct rcu_head *head) void aa_label_kref(struct kref *kref) { - struct aa_label *label = container_of(kref, struct aa_label, count); + struct aa_label *label = container_of(kref, struct aa_label, + count.count); struct aa_ns *ns = labels_ns(label); if (!ns) { @@ -412,7 +415,8 @@ bool aa_label_init(struct aa_label *label, int size, gfp_t gfp) label->size = size; /* doesn't include null */ label->vec[size] = NULL; /* null terminate */ - kref_init(&label->count); + kref_init(&label->count.count); + label->count.reftype = REF_NS; /* for aafs purposes */ RB_CLEAR_NODE(&label->node); return true;
diff --git a/security/apparmor/match.c b/security/apparmor/match.c index 8fa0a14..e9fac67 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c
@@ -160,9 +160,10 @@ static int verify_dfa(struct aa_dfa *dfa) if (state_count == 0) goto out; for (i = 0; i < state_count; i++) { - if (!(BASE_TABLE(dfa)[i] & MATCH_FLAG_DIFF_ENCODE) && - (DEFAULT_TABLE(dfa)[i] >= state_count)) + if (DEFAULT_TABLE(dfa)[i] >= state_count) { + pr_err("AppArmor DFA default state out of bounds"); goto out; + } if (BASE_TABLE(dfa)[i] & MATCH_FLAGS_INVALID) { pr_err("AppArmor DFA state with invalid match flags"); goto out; @@ -201,16 +202,31 @@ static int verify_dfa(struct aa_dfa *dfa) size_t j, k; for (j = i; - (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) && - !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE); + ((BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) && + !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE_VERIFIED)); j = k) { + if (BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE) + /* loop in current chain */ + goto out; k = DEFAULT_TABLE(dfa)[j]; if (j == k) + /* self loop */ goto out; - if (k < j) - break; /* already verified */ BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE; } + /* move mark to verified */ + for (j = i; + (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE); + j = k) { + k = DEFAULT_TABLE(dfa)[j]; + if (j < i) + /* jumps to state/chain that has been + * verified + */ + break; + BASE_TABLE(dfa)[j] &= ~MARK_DIFF_ENCODE; + BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE_VERIFIED; + } } error = 0; @@ -463,13 +479,18 @@ aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start, if (dfa->tables[YYTD_ID_EC]) { /* Equivalence class table defined */ u8 *equiv = EQUIV_TABLE(dfa); - for (; len; len--) - match_char(state, def, base, next, check, - equiv[(u8) *str++]); + for (; len; len--) { + u8 c = equiv[(u8) *str]; + + match_char(state, def, base, next, check, c); + str++; + } } else { /* default is direct to next state */ - for (; len; len--) - match_char(state, def, base, next, check, (u8) *str++); + for (; len; len--) { + match_char(state, def, base, next, check, (u8) *str); + str++; + } } return state; @@ -503,13 +524,18 @@ aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, const char *str) /* Equivalence class table defined */ u8 *equiv = EQUIV_TABLE(dfa); /* default is direct to next state */ - while (*str) - match_char(state, def, base, next, check, - equiv[(u8) *str++]); + while (*str) { + u8 c = equiv[(u8) *str]; + + match_char(state, def, base, next, check, c); + str++; + } } else { /* default is direct to next state */ - while (*str) - match_char(state, def, base, next, check, (u8) *str++); + while (*str) { + match_char(state, def, base, next, check, (u8) *str); + str++; + } } return state;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 9108d74..b6a5eb4 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c
@@ -191,19 +191,43 @@ static void __list_remove_profile(struct aa_profile *profile) } /** - * __remove_profile - remove old profile, and children - * @profile: profile to be replaced (NOT NULL) + * __remove_profile - remove profile, and children + * @profile: profile to be removed (NOT NULL) * * Requires: namespace list lock be held, or list not be shared */ static void __remove_profile(struct aa_profile *profile) { + struct aa_profile *curr, *to_remove; + AA_BUG(!profile); AA_BUG(!profile->ns); AA_BUG(!mutex_is_locked(&profile->ns->lock)); /* release any children lists first */ - __aa_profile_list_release(&profile->base.profiles); + if (!list_empty(&profile->base.profiles)) { + curr = list_first_entry(&profile->base.profiles, struct aa_profile, base.list); + + while (curr != profile) { + + while (!list_empty(&curr->base.profiles)) + curr = list_first_entry(&curr->base.profiles, + struct aa_profile, base.list); + + to_remove = curr; + if (!list_is_last(&to_remove->base.list, + &aa_deref_parent(curr)->base.profiles)) + curr = list_next_entry(to_remove, base.list); + else + curr = aa_deref_parent(curr); + + /* released by free_profile */ + aa_label_remove(&to_remove->label); + __aafs_profile_rmdir(to_remove); + __list_remove_profile(to_remove); + } + } + /* released by free_profile */ aa_label_remove(&profile->label); __aafs_profile_rmdir(profile); @@ -326,7 +350,7 @@ void aa_free_profile(struct aa_profile *profile) } kfree_sensitive(profile->hash); - aa_put_loaddata(profile->rawdata); + aa_put_profile_loaddata(profile->rawdata); aa_label_destroy(&profile->label); kfree_sensitive(profile); @@ -918,17 +942,44 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) return res; } +static bool is_subset_of_obj_privilege(const struct cred *cred, + struct aa_label *label, + const struct cred *ocred) +{ + if (cred == ocred) + return true; + + if (!aa_label_is_subset(label, cred_label(ocred))) + return false; + /* don't allow crossing userns for now */ + if (cred->user_ns != ocred->user_ns) + return false; + if (!cap_issubset(cred->cap_inheritable, ocred->cap_inheritable)) + return false; + if (!cap_issubset(cred->cap_permitted, ocred->cap_permitted)) + return false; + if (!cap_issubset(cred->cap_effective, ocred->cap_effective)) + return false; + if (!cap_issubset(cred->cap_bset, ocred->cap_bset)) + return false; + if (!cap_issubset(cred->cap_ambient, ocred->cap_ambient)) + return false; + return true; +} + + /** * aa_may_manage_policy - can the current task manage policy * @subj_cred: subjects cred * @label: label to check if it can manage policy * @ns: namespace being managed by @label (may be NULL if @label's ns) + * @ocred: object cred if request is coming from an open object * @mask: contains the policy manipulation operation being done * * Returns: 0 if the task is allowed to manipulate policy else error */ int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, - struct aa_ns *ns, u32 mask) + struct aa_ns *ns, const struct cred *ocred, u32 mask) { const char *op; @@ -944,6 +995,11 @@ int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, return audit_policy(label, op, NULL, NULL, "policy_locked", -EACCES); + if (ocred && !is_subset_of_obj_privilege(subj_cred, label, ocred)) + return audit_policy(label, op, NULL, NULL, + "not privileged for target profile", + -EACCES); + if (!aa_policy_admin_capable(subj_cred, label, ns)) return audit_policy(label, op, NULL, NULL, "not policy admin", -EACCES); @@ -1115,7 +1171,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, LIST_HEAD(lh); op = mask & AA_MAY_REPLACE_POLICY ? OP_PROF_REPL : OP_PROF_LOAD; - aa_get_loaddata(udata); + aa_get_profile_loaddata(udata); /* released below */ error = aa_unpack(udata, &lh, &ns_name); if (error) @@ -1142,6 +1198,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, goto fail; } ns_name = ent->ns_name; + ent->ns_name = NULL; } else count++; } @@ -1166,10 +1223,10 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, if (aa_rawdata_eq(rawdata_ent, udata)) { struct aa_loaddata *tmp; - tmp = __aa_get_loaddata(rawdata_ent); + tmp = aa_get_profile_loaddata(rawdata_ent); /* check we didn't fail the race */ if (tmp) { - aa_put_loaddata(udata); + aa_put_profile_loaddata(udata); udata = tmp; break; } @@ -1182,7 +1239,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, struct aa_profile *p; if (aa_g_export_binary) - ent->new->rawdata = aa_get_loaddata(udata); + ent->new->rawdata = aa_get_profile_loaddata(udata); error = __lookup_replace(ns, ent->new->base.hname, !(mask & AA_MAY_REPLACE_POLICY), &ent->old, &info); @@ -1315,7 +1372,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, out: aa_put_ns(ns); - aa_put_loaddata(udata); + aa_put_profile_loaddata(udata); kfree(ns_name); if (error)
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 01b653a..5a907a8 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c
@@ -223,6 +223,8 @@ static struct aa_ns *__aa_create_ns(struct aa_ns *parent, const char *name, AA_BUG(!name); AA_BUG(!mutex_is_locked(&parent->lock)); + if (parent->level > MAX_NS_DEPTH) + return ERR_PTR(-ENOSPC); ns = alloc_ns(parent->base.hname, name); if (!ns) return ERR_PTR(-ENOMEM);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 1769417..076d3ff 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c
@@ -109,22 +109,8 @@ bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r) return memcmp(l->data, r->data, r->compressed_size ?: r->size) == 0; } -/* - * need to take the ns mutex lock which is NOT safe most places that - * put_loaddata is called, so we have to delay freeing it - */ -static void do_loaddata_free(struct work_struct *work) +static void do_loaddata_free(struct aa_loaddata *d) { - struct aa_loaddata *d = container_of(work, struct aa_loaddata, work); - struct aa_ns *ns = aa_get_ns(d->ns); - - if (ns) { - mutex_lock_nested(&ns->lock, ns->level); - __aa_fs_remove_rawdata(d); - mutex_unlock(&ns->lock); - aa_put_ns(ns); - } - kfree_sensitive(d->hash); kfree_sensitive(d->name); kvfree(d->data); @@ -133,10 +119,38 @@ static void do_loaddata_free(struct work_struct *work) void aa_loaddata_kref(struct kref *kref) { - struct aa_loaddata *d = container_of(kref, struct aa_loaddata, count); + struct aa_loaddata *d = container_of(kref, struct aa_loaddata, + count.count); + + do_loaddata_free(d); +} + +/* + * need to take the ns mutex lock which is NOT safe most places that + * put_loaddata is called, so we have to delay freeing it + */ +static void do_ploaddata_rmfs(struct work_struct *work) +{ + struct aa_loaddata *d = container_of(work, struct aa_loaddata, work); + struct aa_ns *ns = aa_get_ns(d->ns); + + if (ns) { + mutex_lock_nested(&ns->lock, ns->level); + /* remove fs ref to loaddata */ + __aa_fs_remove_rawdata(d); + mutex_unlock(&ns->lock); + aa_put_ns(ns); + } + /* called by dropping last pcount, so drop its associated icount */ + aa_put_i_loaddata(d); +} + +void aa_ploaddata_kref(struct kref *kref) +{ + struct aa_loaddata *d = container_of(kref, struct aa_loaddata, pcount); if (d) { - INIT_WORK(&d->work, do_loaddata_free); + INIT_WORK(&d->work, do_ploaddata_rmfs); schedule_work(&d->work); } } @@ -153,7 +167,9 @@ struct aa_loaddata *aa_loaddata_alloc(size_t size) kfree(d); return ERR_PTR(-ENOMEM); } - kref_init(&d->count); + kref_init(&d->count.count); + d->count.reftype = REF_RAWDATA; + kref_init(&d->pcount); INIT_LIST_HEAD(&d->list); return d; @@ -1010,7 +1026,17 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy, if (!aa_unpack_u32(e, &pdb->start[AA_CLASS_FILE], "dfa_start")) { /* default start state for xmatch and file dfa */ pdb->start[AA_CLASS_FILE] = DFA_START; - } /* setup class index */ + } + + size_t state_count = pdb->dfa->tables[YYTD_ID_BASE]->td_lolen; + + if (pdb->start[0] >= state_count || + pdb->start[AA_CLASS_FILE] >= state_count) { + *info = "invalid dfa start state"; + goto fail; + } + + /* setup class index */ for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) { pdb->start[i] = aa_dfa_next(pdb->dfa, pdb->start[0], i); @@ -1409,7 +1435,6 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) { int error = -EPROTONOSUPPORT; const char *name = NULL; - *ns = NULL; /* get the interface version */ if (!aa_unpack_u32(e, &e->version, "version")) {
diff --git a/security/landlock/domain.c b/security/landlock/domain.c index f5b78d4..f0d83f4 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c
@@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void) * allocate with GFP_KERNEL_ACCOUNT because it is independent from the * caller. */ - details = - kzalloc_flex(*details, exe_path, path_size); + details = kzalloc_flex(*details, exe_path, path_size); if (!details) return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 3198735..73018dc 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c
@@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers) { struct landlock_ruleset *new_ruleset; - new_ruleset = - kzalloc_flex(*new_ruleset, access_masks, num_layers, - GFP_KERNEL_ACCOUNT); + new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers, + GFP_KERNEL_ACCOUNT); if (!new_ruleset) return ERR_PTR(-ENOMEM); refcount_set(&new_ruleset->usage, 1); @@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent, if (IS_ERR(new_dom)) return new_dom; - new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy, - GFP_KERNEL_ACCOUNT); + new_dom->hierarchy = + kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT); if (!new_dom->hierarchy) return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index de01aa8..4d4427b 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c
@@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s, return ctx; } +/** + * tsync_works_trim - Put the last tsync_work element + * + * @s: TSYNC works to trim. + * + * Put the last task and decrement the size of @s. + * + * This helper does not cancel a running task, but just reset the last element + * to zero. + */ +static void tsync_works_trim(struct tsync_works *s) +{ + struct tsync_work *ctx; + + if (WARN_ON_ONCE(s->size <= 0)) + return; + + ctx = s->works[s->size - 1]; + + /* + * For consistency, remove the task from ctx so that it does not look like + * we handed it a task_work. + */ + put_task_struct(ctx->task); + *ctx = (typeof(*ctx)){}; + + /* + * Cancel the tsync_works_provide() change to recycle the reserved memory + * for the next thread, if any. This also ensures that cancel_tsync_works() + * and tsync_works_release() do not see any NULL task pointers. + */ + s->size--; +} + /* * tsync_works_grow_by - preallocates space for n more contexts in s * @@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) * tsync_works_contains - checks for presence of task in s */ static bool tsync_works_contains_task(const struct tsync_works *s, - struct task_struct *task) + const struct task_struct *task) { size_t i; for (i = 0; i < s->size; i++) if (s->works[i]->task == task) return true; + return false; } @@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s) size_t i; for (i = 0; i < s->size; i++) { - if (!s->works[i]->task) + if (WARN_ON_ONCE(!s->works[i]->task)) continue; put_task_struct(s->works[i]->task); @@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s) for (i = 0; i < s->capacity; i++) kfree(s->works[i]); + kfree(s->works); s->works = NULL; s->size = 0; @@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s) */ static size_t count_additional_threads(const struct tsync_works *works) { - struct task_struct *thread, *caller; + const struct task_struct *caller, *thread; size_t n = 0; caller = current; @@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works, struct tsync_shared_context *shared_ctx) { int err; - struct task_struct *thread, *caller; + const struct task_struct *caller; + struct task_struct *thread; struct tsync_work *ctx; bool found_more_threads = false; @@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works, init_task_work(&ctx->work, restrict_one_thread_callback); err = task_work_add(thread, &ctx->work, TWA_SIGNAL); - if (err) { + if (unlikely(err)) { /* * task_work_add() only fails if the task is about to exit. We * checked that earlier, but it can happen as a race. Resume * without setting an error, as the task is probably gone in the - * next loop iteration. For consistency, remove the task from ctx - * so that it does not look like we handed it a task_work. + * next loop iteration. */ - put_task_struct(ctx->task); - ctx->task = NULL; + tsync_works_trim(works); atomic_dec(&shared_ctx->num_preparing); atomic_dec(&shared_ctx->num_unfinished); @@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works, * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two * completions if needed, as if the task was never scheduled. */ -static void cancel_tsync_works(struct tsync_works *works, +static void cancel_tsync_works(const struct tsync_works *works, struct tsync_shared_context *shared_ctx) { - int i; + size_t i; for (i = 0; i < works->size; i++) { + if (WARN_ON_ONCE(!works->works[i]->task)) + continue; + if (!task_work_cancel(works->works[i]->task, &works->works[i]->work)) continue; @@ -448,6 +486,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, shared_ctx.set_no_new_privs = task_no_new_privs(current); /* + * Serialize concurrent TSYNC operations to prevent deadlocks when + * multiple threads call landlock_restrict_self() simultaneously. + * If the lock is already held, we gracefully yield by restarting the + * syscall. This allows the current thread to process pending + * task_works before retrying. + */ + if (!down_write_trylock(¤t->signal->exec_update_lock)) + return restart_syscall(); + + /* * We schedule a pseudo-signal task_work for each of the calling task's * sibling threads. In the task work, each thread: * @@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, -ERESTARTNOINTR); /* - * Cancel task works for tasks that did not start running yet, - * and decrement all_prepared and num_unfinished accordingly. + * Opportunistic improvement: try to cancel task + * works for tasks that did not start running + * yet. We do not have a guarantee that it + * cancels any of the enqueued task works + * because task_work_run() might already have + * dequeued them. */ cancel_tsync_works(&works, &shared_ctx); /* - * The remaining task works have started running, so waiting for - * their completion will finish. + * Break the loop with error. The cleanup code + * after the loop unblocks the remaining + * task_works. */ - wait_for_completion(&shared_ctx.all_prepared); + break; } } } while (found_more_threads && !atomic_read(&shared_ctx.preparation_error)); /* - * We now have all sibling threads blocking and in "prepared" state in the - * task work. Ask all threads to commit. + * We now have either (a) all sibling threads blocking and in "prepared" + * state in the task work, or (b) the preparation error is set. Ask all + * threads to commit (or abort). */ complete_all(&shared_ctx.ready_to_commit); @@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, wait_for_completion(&shared_ctx.all_finished); tsync_works_release(&works); - + up_write(¤t->signal->exec_update_lock); return atomic_read(&shared_ctx.preparation_error); }
diff --git a/security/security.c b/security/security.c index 67af9228..a26c147 100644 --- a/security/security.c +++ b/security/security.c
@@ -61,6 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM", [LOCKDOWN_RTAS_ERROR_INJECTION] = "RTAS error injection", + [LOCKDOWN_XEN_USER_ACTIONS] = "Xen guest user action", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes",
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 67cf6a0..5a64453 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c
@@ -2144,6 +2144,10 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, for (;;) { long tout; struct snd_pcm_runtime *to_check; + unsigned int drain_rate; + snd_pcm_uframes_t drain_bufsz; + bool drain_no_period_wakeup; + if (signal_pending(current)) { result = -ERESTARTSYS; break; @@ -2163,16 +2167,25 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream, snd_pcm_group_unref(group, substream); if (!to_check) break; /* all drained */ + /* + * Cache the runtime fields needed after unlock. + * A concurrent close() on the linked stream may free + * its runtime via snd_pcm_detach_substream() once we + * release the stream lock below. + */ + drain_no_period_wakeup = to_check->no_period_wakeup; + drain_rate = to_check->rate; + drain_bufsz = to_check->buffer_size; init_waitqueue_entry(&wait, current); set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&to_check->sleep, &wait); snd_pcm_stream_unlock_irq(substream); - if (runtime->no_period_wakeup) + if (drain_no_period_wakeup) tout = MAX_SCHEDULE_TIMEOUT; else { tout = 100; - if (runtime->rate) { - long t = runtime->buffer_size * 1100 / runtime->rate; + if (drain_rate) { + long t = drain_bufsz * 1100 / drain_rate; tout = max(t, tout); } tout = msecs_to_jiffies(tout);
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index e97721f..8e70da8 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c
@@ -1164,7 +1164,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_ struct pkt_desc *desc = s->packet_descs_cursor; unsigned int pkt_header_length; unsigned int packets; - u32 curr_cycle_time; + u32 curr_cycle_time = 0; bool need_hw_irq; int i;
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c index 85d265c..f7a50ba 100644 --- a/sound/firewire/dice/dice.c +++ b/sound/firewire/dice/dice.c
@@ -122,7 +122,7 @@ static void dice_card_strings(struct snd_dice *dice) fw_csr_string(dev->config_rom + 5, CSR_VENDOR, vendor, sizeof(vendor)); strscpy(model, "?"); fw_csr_string(dice->unit->directory, CSR_MODEL, model, sizeof(model)); - snprintf(card->longname, sizeof(card->longname), + scnprintf(card->longname, sizeof(card->longname), "%s %s (serial %u) at %s, S%d", vendor, model, dev->config_rom[4] & 0x3fffff, dev_name(&dice->unit->device), 100 << dev->max_speed);
diff --git a/sound/hda/codecs/ca0132.c b/sound/hda/codecs/ca0132.c index bf342a7..a0677d7 100644 --- a/sound/hda/codecs/ca0132.c +++ b/sound/hda/codecs/ca0132.c
@@ -9816,6 +9816,15 @@ static void ca0132_config(struct hda_codec *codec) spec->dig_in = 0x09; break; } + + /* Default HP/Speaker auto-detect from headphone pin verb: enable if the + * pin config indicates presence detect (not AC_DEFCFG_MISC_NO_PRESENCE). + */ + if (spec->unsol_tag_hp && + (snd_hda_query_pin_caps(codec, spec->unsol_tag_hp) & AC_PINCAP_PRES_DETECT) && + !(get_defcfg_misc(snd_hda_codec_get_pincfg(codec, spec->unsol_tag_hp)) & + AC_DEFCFG_MISC_NO_PRESENCE)) + spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID] = 1; } static int ca0132_prepare_verbs(struct hda_codec *codec)
diff --git a/sound/hda/codecs/hdmi/tegrahdmi.c b/sound/hda/codecs/hdmi/tegrahdmi.c index 5f6fe31..ebb6410 100644 --- a/sound/hda/codecs/hdmi/tegrahdmi.c +++ b/sound/hda/codecs/hdmi/tegrahdmi.c
@@ -299,6 +299,7 @@ static const struct hda_device_id snd_hda_id_tegrahdmi[] = { HDA_CODEC_ID_MODEL(0x10de002f, "Tegra194 HDMI/DP2", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0030, "Tegra194 HDMI/DP3", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0031, "Tegra234 HDMI/DP", MODEL_TEGRA234), + HDA_CODEC_ID_MODEL(0x10de0032, "Tegra238 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0033, "SoC 33 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0034, "Tegra264 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0035, "SoC 35 HDMI/DP", MODEL_TEGRA234),
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 3605304..6db23ce 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c
@@ -1017,6 +1017,50 @@ static int alc269_resume(struct hda_codec *codec) return 0; } +#define ALC233_STARFIGHTER_SPK_PIN 0x1b +#define ALC233_STARFIGHTER_GPIO2 0x04 + +static void alc233_starfighter_update_amp(struct hda_codec *codec, bool on) +{ + snd_hda_codec_write(codec, ALC233_STARFIGHTER_SPK_PIN, 0, + AC_VERB_SET_EAPD_BTLENABLE, + on ? AC_EAPDBTL_EAPD : 0); + alc_update_gpio_data(codec, ALC233_STARFIGHTER_GPIO2, on); +} + +static void alc233_starfighter_pcm_hook(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream, + int action) +{ + switch (action) { + case HDA_GEN_PCM_ACT_PREPARE: + alc233_starfighter_update_amp(codec, true); + break; + case HDA_GEN_PCM_ACT_CLEANUP: + alc233_starfighter_update_amp(codec, false); + break; + } +} + +static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gpio_mask |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_dir |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_data &= ~ALC233_STARFIGHTER_GPIO2; + break; + case HDA_FIXUP_ACT_PROBE: + spec->gen.pcm_playback_hook = alc233_starfighter_pcm_hook; + break; + } +} + static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3681,22 +3725,42 @@ static void alc245_tas2781_spi_hp_fixup_muteled(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x04, 0x0); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } + +static void alc245_hp_spk_mute_led_update(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + unsigned int val; + + val = enabled ? 0x08 : 0x04; /* 0x08 led on, 0x04 led off */ + alc_update_coef_idx(codec, 0x0b, 0x0c, val); +} + /* JD2: mute led GPIO3: micmute led */ static void alc245_tas2781_i2c_hp_fixup_muteled(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); static const hda_nid_t conn[] = { 0x02 }; switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: + if (!hp_pin) { + spec->gen.vmaster_mute.hook = alc245_hp_spk_mute_led_update; + spec->gen.vmaster_mute_led = 1; + } spec->gen.auto_mute_via_amp = 1; snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); break; + case HDA_FIXUP_ACT_INIT: + if (!hp_pin) + alc245_hp_spk_mute_led_update(codec, !spec->gen.master_mute); + break; } tas2781_fixup_txnw_i2c(codec, fix, action); - alc245_fixup_hp_mute_led_coefbit(codec, fix, action); + if (hp_pin) + alc245_fixup_hp_mute_led_coefbit(codec, fix, action); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } /* @@ -4040,6 +4104,7 @@ enum { ALC245_FIXUP_CLEVO_NOISY_MIC, ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE, ALC233_FIXUP_MEDION_MTL_SPK, + ALC233_FIXUP_STARLABS_STARFIGHTER, ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, ALC294_FIXUP_ASUS_CS35L41_SPI_2, @@ -4056,6 +4121,9 @@ enum { ALC236_FIXUP_HP_MUTE_LED_MICMUTE_GPIO, ALC233_FIXUP_LENOVO_GPIO2_MIC_HOTKEY, ALC245_FIXUP_BASS_HP_DAC, + ALC245_FIXUP_ACER_MICMUTE_LED, + ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED, + ALC236_FIXUP_HP_DMIC, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6499,6 +6567,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC233_FIXUP_STARLABS_STARFIGHTER] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_starlabs_starfighter, + }, [ALC294_FIXUP_BASS_SPEAKER_15] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_bass_speaker_15, @@ -6576,6 +6648,25 @@ static const struct hda_fixup alc269_fixups[] = { /* Borrow the DAC routing selected for those Thinkpads */ .v.func = alc285_fixup_thinkpad_x1_gen7, }, + [ALC245_FIXUP_ACER_MICMUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_coef_micmute_led, + .chained = true, + .chain_id = ALC2XX_FIXUP_HEADSET_MIC, + }, + [ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_mute_led_coefbit, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2, + }, + [ALC236_FIXUP_HP_DMIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x12, 0x90a60160 }, /* use as internal mic */ + { } + }, + } }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -6591,6 +6682,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x0943, "Acer Aspire V3-572G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), @@ -6627,6 +6719,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), + SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED), + SND_PCI_QUIRK(0x1025, 0x173a, "Acer Swift SFG14-73", ALC245_FIXUP_ACER_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), @@ -6822,6 +6916,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8756, "HP ENVY Laptop 13-ba0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8760, "HP EliteBook 8{4,5}5 G7", ALC285_FIXUP_HP_BEEP_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x876e, "HP ENVY x360 Convertible 13-ay0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), @@ -6835,6 +6930,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87cb, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -6872,6 +6968,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88b3, "HP ENVY x360 Convertible 15-es0xxx", ALC245_FIXUP_HP_ENVY_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x88d1, "HP Pavilion 15-eh1xxx (mainboard 88D1)", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x88dd, "HP Pavilion 15z-ec200", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x88eb, "HP Victus 16-e0xxx", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED), @@ -6907,6 +7004,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89da, "HP Spectre x360 14t-ea100", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x89e7, "HP Elite x2 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a0f, "HP Pavilion 14-ec1xxx", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8a1f, "HP Laptop 14s-dr5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a20, "HP Laptop 15s-fq5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a25, "HP Victus 16-d1xxx (MB 8A25)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a26, "HP Victus 16-d1xxx (MB 8A26)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), @@ -6920,6 +7018,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a34, "HP Pavilion x360 2-in-1 Laptop 14-ek0xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8a3d, "HP Victus 15-fb0xxx (MB 8A3D)", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x103c, 0x8a74, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -7063,7 +7162,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dc9, "HP Laptop 15-fc0xxx", ALC236_FIXUP_HP_DMIC), SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), + SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), @@ -7094,7 +7195,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e60, "HP OmniBook 7 Laptop 16-bh0xxx", ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e8a, "HP NexusX", ALC245_FIXUP_HP_TAS2781_I2C_MUTE_LED), @@ -7143,6 +7244,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + HDA_CODEC_QUIRK(0x1043, 0x1204, "ASUS Strix G16 G615JMR", ALC287_FIXUP_TXNW2781_I2C_ASUS), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -7172,10 +7274,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x14f2, "ASUS VivoBook X515JA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1514, "ASUS ROG Flow Z13 GZ302EAC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1584, "ASUS UM3406GA ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1602, "ASUS ROG Strix SCAR 15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1652, "ASUS ROG Zephyrus Do 15 SE", ALC289_FIXUP_ASUS_ZEPHYRUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), @@ -7240,6 +7344,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), + HDA_CODEC_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC285_FIXUP_ASUS_I2C_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), @@ -7311,8 +7416,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), - SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc188, "Samsung Galaxy Book Flex (NT950QCT-A38A)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1ac, "Samsung Galaxy Book2 Pro 360 (NP950QED)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a4, "Samsung Galaxy Book Pro 360 (NT935QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), @@ -7459,6 +7566,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2288, "Thinkpad X390", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), @@ -7507,6 +7615,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Lenovo Yoga 9i / Yoga Book 9i", ALC287_FIXUP_LENOVO_YOGA_BOOK_9I), + /* Yoga Pro 7 14IMH9 shares PCI SSID 17aa:3847 with Legion 7 16ACHG6; + * use codec SSID to distinguish them + */ + HDA_CODEC_QUIRK(0x17aa, 0x38cf, "Lenovo Yoga Pro 7 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6), SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), @@ -7538,6 +7650,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), + HDA_CODEC_QUIRK(0x17aa, 0x391d, "Lenovo Yoga 7 2-in-1 16AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), @@ -7568,6 +7681,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC), + SND_PCI_QUIRK(0x17aa, 0x391a, "Lenovo Yoga Slim 7 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3929, "Thinkbook 13x Gen 5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), @@ -7651,6 +7765,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x2782, 0x4900, "MEDION E15443", ALC233_FIXUP_MEDION_MTL_SPK), + SND_PCI_QUIRK(0x7017, 0x2014, "Star Labs StarFighter", ALC233_FIXUP_STARLABS_STARFIGHTER), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), @@ -7660,6 +7775,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000f, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. @@ -7747,6 +7863,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_TPT470_DOCK_FIX, .name = "tpt470-dock-fix"}, {.id = ALC298_FIXUP_TPT470_DOCK, .name = "tpt470-dock"}, {.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, + {.id = ALC233_FIXUP_STARLABS_STARFIGHTER, .name = "starlabs-starfighter"}, {.id = ALC700_FIXUP_INTEL_REFERENCE, .name = "alc700-ref"}, {.id = ALC269_FIXUP_SONY_VAIO, .name = "vaio"}, {.id = ALC269_FIXUP_DELL_M101Z, .name = "dell-m101z"},
diff --git a/sound/hda/codecs/realtek/alc662.c b/sound/hda/codecs/realtek/alc662.c index 5073165..3a943ad 100644 --- a/sound/hda/codecs/realtek/alc662.c +++ b/sound/hda/codecs/realtek/alc662.c
@@ -313,6 +313,7 @@ enum { ALC897_FIXUP_HEADSET_MIC_PIN2, ALC897_FIXUP_UNIS_H3C_X500S, ALC897_FIXUP_HEADSET_MIC_PIN3, + ALC897_FIXUP_H610M_HP_PIN, }; static const struct hda_fixup alc662_fixups[] = { @@ -766,6 +767,13 @@ static const struct hda_fixup alc662_fixups[] = { { } }, }, + [ALC897_FIXUP_H610M_HP_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x0321403f }, /* HP out */ + { } + }, + }, }; static const struct hda_quirk alc662_fixup_tbl[] = { @@ -815,6 +823,7 @@ static const struct hda_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x1458, 0xa194, "H610M H V2 DDR4", ALC897_FIXUP_H610M_HP_PIN), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN),
diff --git a/sound/hda/codecs/senarytech.c b/sound/hda/codecs/senarytech.c index 3a50d4b..6239a25 100644 --- a/sound/hda/codecs/senarytech.c +++ b/sound/hda/codecs/senarytech.c
@@ -19,15 +19,13 @@ #include "hda_jack.h" #include "generic.h" -/* GPIO node ID */ -#define SENARY_GPIO_NODE 0x01 - struct senary_spec { struct hda_gen_spec gen; /* extra EAPD pins */ unsigned int num_eapds; hda_nid_t eapds[4]; + bool dynamic_eapd; hda_nid_t mute_led_eapd; unsigned int parse_flags; /* flag for snd_hda_parse_pin_defcfg() */ @@ -123,19 +121,23 @@ static void senary_init_gpio_led(struct hda_codec *codec) unsigned int mask = spec->gpio_mute_led_mask | spec->gpio_mic_led_mask; if (mask) { - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_MASK, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_MASK, mask); - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_DIRECTION, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_DIRECTION, mask); - snd_hda_codec_write(codec, SENARY_GPIO_NODE, 0, AC_VERB_SET_GPIO_DATA, + snd_hda_codec_write(codec, codec->core.afg, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_led); } } static int senary_init(struct hda_codec *codec) { + struct senary_spec *spec = codec->spec; + snd_hda_gen_init(codec); senary_init_gpio_led(codec); + if (!spec->dynamic_eapd) + senary_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, true); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); return 0;
diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index cfc8de2..1ace4be 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c
@@ -249,7 +249,7 @@ static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); - unsigned long pos = ucontrol->value.integer.value[0]; + long pos = ucontrol->value.integer.value[0]; bool changed; int ret; @@ -403,10 +403,6 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) snd_ctl_remove(cs35l56->codec->card, cs35l56->volume_ctl); } -static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - /* cs_dsp requires the client to provide this even if it is empty */ -}; - static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, const struct firmware **firmware, char **filename, const char *base_name, const char *system_name, @@ -1149,7 +1145,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) cs35l56->base.cal_index = cs35l56->index; cs35l56_init_cs_dsp(&cs35l56->base, &cs35l56->cs_dsp); - cs35l56->cs_dsp.client_ops = &cs35l56_hda_client_ops; if (cs35l56->base.reset_gpio) { dev_dbg(cs35l56->base.dev, "Hard reset\n");
diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 74c3cf1..67240ce 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c
@@ -60,7 +60,6 @@ struct tas2781_hda_i2c_priv { int (*save_calibration)(struct tas2781_hda *h); int hda_chip_id; - bool skip_calibration; }; static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data) @@ -479,8 +478,7 @@ static void tasdevice_dspfw_init(void *context) /* If calibrated data occurs error, dsp will still works with default * calibrated data inside algo. */ - if (!hda_priv->skip_calibration) - hda_priv->save_calibration(tas_hda); + hda_priv->save_calibration(tas_hda); } static void tasdev_fw_ready(const struct firmware *fmw, void *context) @@ -535,7 +533,6 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, void *master_data) { struct tas2781_hda *tas_hda = dev_get_drvdata(dev); - struct tas2781_hda_i2c_priv *hda_priv = tas_hda->hda_priv; struct hda_component_parent *parent = master_data; struct hda_component *comp; struct hda_codec *codec; @@ -564,14 +561,6 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, break; } - /* - * Using ASUS ROG Xbox Ally X (RC73XA) UEFI calibration data - * causes audio dropouts during playback, use fallback data - * from DSP firmware as a workaround. - */ - if (codec->core.subsystem_id == 0x10431384) - hda_priv->skip_calibration = true; - guard(pm_runtime_active_auto)(dev); comp->dev = dev; @@ -643,6 +632,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) */ device_name = "TIAS2781"; hda_priv->hda_chip_id = HDA_TAS2781; + tas_hda->priv->chip_id = TAS2781; hda_priv->save_calibration = tas2781_save_calibration; tas_hda->priv->global_addr = TAS2781_GLOBAL_ADDR; } else if (strstarts(dev_name(&clt->dev), "i2c-TXNW2770")) { @@ -656,6 +646,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) "i2c-TXNW2781:00-tas2781-hda.0")) { device_name = "TXNW2781"; hda_priv->hda_chip_id = HDA_TAS2781; + tas_hda->priv->chip_id = TAS2781; hda_priv->save_calibration = tas2781_save_calibration; tas_hda->priv->global_addr = TAS2781_GLOBAL_ADDR; } else if (strstr(dev_name(&clt->dev), "INT8866")) {
diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 6fddf40..8a7bd49 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c
@@ -1751,6 +1751,8 @@ static int default_bdl_pos_adj(struct azx *chip) return 1; case AZX_DRIVER_ZHAOXINHDMI: return 128; + case AZX_DRIVER_NVIDIA: + return 64; default: return 32; } @@ -2075,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ - { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ {} }; @@ -2084,6 +2085,11 @@ static struct pci_device_id driver_denylist_ideapad_z570[] = { {} }; +static struct pci_device_id driver_denylist_msi_x870e[] = { + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ + {} +}; + /* DMI-based denylist, to be used when: * - PCI subsystem IDs are zero, impossible to distinguish from valid sound cards. * - Different modifications of the same laptop use different GPU models. @@ -2097,6 +2103,14 @@ static const struct dmi_system_id driver_denylist_dmi[] = { }, .driver_data = &driver_denylist_ideapad_z570, }, + { + /* PCI device matching alone incorrectly matches some laptops */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "MAG X870E TOMAHAWK WIFI (MS-7E59)"), + }, + .driver_data = &driver_denylist_msi_x870e, + }, {} };
diff --git a/sound/pci/asihpi/hpimsgx.c b/sound/pci/asihpi/hpimsgx.c index b68e6bf..ed1c7b7 100644 --- a/sound/pci/asihpi/hpimsgx.c +++ b/sound/pci/asihpi/hpimsgx.c
@@ -581,8 +581,10 @@ static u16 adapter_prepare(u16 adapter) HPI_ADAPTER_OPEN); hm.adapter_index = adapter; hw_entry_point(&hm, &hr); - memcpy(&rESP_HPI_ADAPTER_OPEN[adapter], &hr, - sizeof(rESP_HPI_ADAPTER_OPEN[0])); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].h, &hr, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].h)); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].a, &hr.u.ax.info, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].a)); if (hr.error) return hr.error;
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index f122e39..da2667c 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c
@@ -1427,10 +1427,14 @@ static int atc_get_resources(struct ct_atc *atc) daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO]; da_desc.msr = atc->msr; for (i = 0; i < NUM_DAIOTYP; i++) { - if (((i == MIC) && !cap.dedicated_mic) || ((i == RCA) && !cap.dedicated_rca)) + if (((i == MIC) && !cap.dedicated_mic) || + ((i == RCA) && !cap.dedicated_rca) || + i == SPDIFI1) continue; - da_desc.type = (atc->model != CTSB073X) ? i : - ((i == SPDIFIO) ? SPDIFI1 : i); + if (atc->model == CTSB073X && i == SPDIFIO) + da_desc.type = SPDIFI1; + else + da_desc.type = i; da_desc.output = (i < LINEIM) || (i == RCA); err = daio_mgr->get_daio(daio_mgr, &da_desc, (struct daio **)&atc->daios[i]);
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index b8bde27..4dbb1dd 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c
@@ -99,7 +99,7 @@ static const struct rsc_ops daio_in_rsc_ops_20k2 = { .output_slot = daio_index, }; -static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) +static int daio_device_index(enum DAIOTYP type, struct hw *hw) { switch (hw->chip_type) { case ATC20K1: @@ -112,12 +112,15 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) case LINEO3: return 5; case LINEO4: return 6; case LINEIM: return 7; - default: return -EINVAL; + default: + pr_err("ctxfi: Invalid type %d for hw20k1\n", type); + return -EINVAL; } case ATC20K2: switch (type) { case SPDIFOO: return 0; case SPDIFIO: return 0; + case SPDIFI1: return 1; case LINEO1: return 4; case LINEO2: return 7; case LINEO3: return 5; @@ -125,9 +128,12 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) case LINEIM: return 4; case MIC: return 5; case RCA: return 3; - default: return -EINVAL; + default: + pr_err("ctxfi: Invalid type %d for hw20k2\n", type); + return -EINVAL; } default: + pr_err("ctxfi: Invalid chip type %d\n", hw->chip_type); return -EINVAL; } } @@ -148,8 +154,11 @@ static int dao_spdif_set_spos(struct dao *dao, unsigned int spos) static int dao_commit_write(struct dao *dao) { - dao->hw->dao_commit_write(dao->hw, - daio_device_index(dao->daio.type, dao->hw), dao->ctrl_blk); + int idx = daio_device_index(dao->daio.type, dao->hw); + + if (idx < 0) + return idx; + dao->hw->dao_commit_write(dao->hw, idx, dao->ctrl_blk); return 0; } @@ -287,8 +296,11 @@ static int dai_set_enb_srt(struct dai *dai, unsigned int enb) static int dai_commit_write(struct dai *dai) { - dai->hw->dai_commit_write(dai->hw, - daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk); + int idx = daio_device_index(dai->daio.type, dai->hw); + + if (idx < 0) + return idx; + dai->hw->dai_commit_write(dai->hw, idx, dai->ctrl_blk); return 0; } @@ -367,7 +379,7 @@ static int dao_rsc_init(struct dao *dao, { struct hw *hw = mgr->mgr.hw; unsigned int conf; - int err; + int idx, err; err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw); if (err) @@ -386,15 +398,18 @@ static int dao_rsc_init(struct dao *dao, if (err) goto error2; - hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw)); + idx = daio_device_index(dao->daio.type, hw); + if (idx < 0) { + err = idx; + goto error2; + } + + hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx); hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk); conf = (desc->msr & 0x7) | (desc->passthru << 3); - hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw), conf); - hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw)); + hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk, idx, conf); + hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx); hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk); return 0; @@ -443,7 +458,7 @@ static int dai_rsc_init(struct dai *dai, const struct daio_desc *desc, struct daio_mgr *mgr) { - int err; + int idx, err; struct hw *hw = mgr->mgr.hw; unsigned int rsr, msr; @@ -457,6 +472,12 @@ static int dai_rsc_init(struct dai *dai, if (err) goto error1; + idx = daio_device_index(dai->daio.type, dai->hw); + if (idx < 0) { + err = idx; + goto error1; + } + for (rsr = 0, msr = desc->msr; msr > 1; msr >>= 1) rsr++; @@ -465,8 +486,7 @@ static int dai_rsc_init(struct dai *dai, /* default to disabling control of a SRC */ hw->dai_srt_set_ec(dai->ctrl_blk, 0); hw->dai_srt_set_et(dai->ctrl_blk, 0); /* default to disabling SRT */ - hw->dai_commit_write(hw, - daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk); + hw->dai_commit_write(hw, idx, dai->ctrl_blk); return 0; @@ -581,28 +601,28 @@ static int put_daio_rsc(struct daio_mgr *mgr, struct daio *daio) static int daio_mgr_enb_daio(struct daio_mgr *mgr, struct daio *daio) { struct hw *hw = mgr->mgr.hw; + int idx = daio_device_index(daio->type, hw); - if (daio->output) { - hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } else { - hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } + if (idx < 0) + return idx; + if (daio->output) + hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx); + else + hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk, idx); return 0; } static int daio_mgr_dsb_daio(struct daio_mgr *mgr, struct daio *daio) { struct hw *hw = mgr->mgr.hw; + int idx = daio_device_index(daio->type, hw); - if (daio->output) { - hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } else { - hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } + if (idx < 0) + return idx; + if (daio->output) + hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx); + else + hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk, idx); return 0; }
diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 365209e..1604ed6 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c
@@ -23,6 +23,16 @@ static int acp_quirk_data; +static const struct dmi_system_id acp70_acpi_flag_override_table[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + }, + {} +}; + static const struct config_entry config_table[] = { { .flags = FLAG_AMD_SOF, @@ -186,8 +196,11 @@ int snd_amd_acp_find_config(struct pci_dev *pci) */ if (!pci->revision) return 0; - else if (pci->revision >= ACP_7_0_REV) + else if (pci->revision >= ACP_7_0_REV) { + if (dmi_check_system(acp70_acpi_flag_override_table)) + return 0; return snd_amd_acp_acpi_find_config(pci); + } for (i = 0; i < ARRAY_SIZE(config_table); i++, table++) { if (table->device != device)
diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index 4d99472..09f6c9a 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c
@@ -127,8 +127,13 @@ static int acp_card_rt5682_init(struct snd_soc_pcm_runtime *rtd) if (drvdata->hs_codec_id != RT5682) return -EINVAL; - drvdata->wclk = clk_get(component->dev, "rt5682-dai-wclk"); - drvdata->bclk = clk_get(component->dev, "rt5682-dai-bclk"); + drvdata->wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(drvdata->wclk)) + return PTR_ERR(drvdata->wclk); + + drvdata->bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(drvdata->bclk)) + return PTR_ERR(drvdata->bclk); ret = snd_soc_dapm_new_controls(dapm, rt5682_widgets, ARRAY_SIZE(rt5682_widgets)); @@ -370,8 +375,13 @@ static int acp_card_rt5682s_init(struct snd_soc_pcm_runtime *rtd) return -EINVAL; if (!drvdata->soc_mclk) { - drvdata->wclk = clk_get(component->dev, "rt5682-dai-wclk"); - drvdata->bclk = clk_get(component->dev, "rt5682-dai-bclk"); + drvdata->wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(drvdata->wclk)) + return PTR_ERR(drvdata->wclk); + + drvdata->bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(drvdata->bclk)) + return PTR_ERR(drvdata->bclk); } ret = snd_soc_dapm_new_controls(dapm, rt5682s_widgets,
diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index c30ccf2..6388cd7 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c
@@ -111,6 +111,14 @@ static const struct dmi_system_id soc_sdw_quirk_table[] = { }, .driver_data = (void *)(ASOC_SDW_CODEC_SPKR), }, + { + .callback = soc_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + .driver_data = (void *)(ASOC_SDW_ACP_DMIC), + }, {} };
diff --git a/sound/soc/amd/acp/amd-acp63-acpi-match.c b/sound/soc/amd/acp/amd-acp63-acpi-match.c index 9b6a49c..1dbbaba 100644 --- a/sound/soc/amd/acp/amd-acp63-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp63-acpi-match.c
@@ -30,6 +30,20 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1 }; +static const struct snd_soc_acpi_endpoint spk_2_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 2, + .group_id = 1 +}; + +static const struct snd_soc_acpi_endpoint spk_3_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 3, + .group_id = 1 +}; + static const struct snd_soc_acpi_adr_device rt711_rt1316_group_adr[] = { { .adr = 0x000030025D071101ull, @@ -103,6 +117,345 @@ static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = { } }; +static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { + { /* Jack Playback Endpoint */ + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* DMIC Capture Endpoint */ + .num = 1, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Jack Capture Endpoint */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Speaker Playback Endpoint */ + .num = 3, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l56x4_l1u3210_adr[] = { + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l0u01_adr[] = { + { + .adr = 0x00003001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00003101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l1u01_adr[] = { + { + .adr = 0x00013001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x2_l1u13_adr[] = { + { + .adr = 0x00013101FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013301FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l63x4_l0u0246_adr[] = { + { + .adr = 0x00003001FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00003201FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00003401FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00003601FA356301ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l0u0_adr[] = { + { + .adr = 0x00003001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l0u1_adr[] = { + { + .adr = 0x00003101FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43b_l0u1_adr[] = { + { + .adr = 0x00003101FA2A3B01ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_l1u0_cs35l56x4_l1u0123_adr[] = { + { + .adr = 0x00013001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l45_l0u0_adr[] = { + { + .adr = 0x00003001FA424501ull, + /* Re-use endpoints, but cs42l45 has no speaker */ + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints) - 1, + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l45" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l45_l1u0_adr[] = { + { + .adr = 0x00013001FA424501ull, + /* Re-use endpoints, but cs42l45 has no speaker */ + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints) - 1, + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l45" + } +}; + +static const struct snd_soc_acpi_link_adr acp63_cs35l56x4_l1u3210[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_l1u3210_adr), + .adr_d = cs35l56x4_l1u3210_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs35l63x4_l0u0246[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x4_l0u0246_adr), + .adr_d = cs35l63x4_l0u0246_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l0u1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_l0u1_adr), + .adr_d = cs42l43_l0u1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43b_l0u1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43b_l0u1_adr), + .adr_d = cs42l43b_l0u1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l0u0_cs35l56x4_l1u3210[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_l0u0_adr), + .adr_d = cs42l43_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_l1u3210_adr), + .adr_d = cs35l56x4_l1u3210_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l43_l1u0_cs35l56x4_l1u0123[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l43_l1u0_cs35l56x4_l1u0123_adr), + .adr_d = cs42l43_l1u0_cs35l56x4_l1u0123_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0_cs35l63x2_l1u01[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l63x2_l1u01_adr), + .adr_d = cs35l63x2_l1u01_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l0u0_cs35l63x2_l1u13[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l45_l0u0_adr), + .adr_d = cs42l45_l0u0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l63x2_l1u13_adr), + .adr_d = cs35l63x2_l1u13_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0_cs35l63x2_l0u01[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x2_l0u01_adr), + .adr_d = cs35l63x2_l0u01_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp63_cs42l45_l1u0_cs35l63x4_l0u0246[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l45_l1u0_adr), + .adr_d = cs42l45_l1u0_adr, + }, + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs35l63x4_l0u0246_adr), + .adr_d = cs35l63x4_l0u0246_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr acp63_rt722_only[] = { { .mask = BIT(0), @@ -135,6 +488,66 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp63_sdw_machines[] = { .links = acp63_4_in_1_sdca, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l43_l0u0_cs35l56x4_l1u3210, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l1u0_cs35l63x4_l0u0246, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l0u0_cs35l63x2_l1u01, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l0u0_cs35l63x2_l1u13, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp63_cs42l45_l1u0_cs35l63x2_l0u01, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs42l43_l1u0_cs35l56x4_l1u0123, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs35l56x4_l1u3210, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs35l63x4_l0u0246, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l43_l0u1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l43b_l0u1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(0), + .links = acp63_cs42l45_l0u0, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp63_cs42l45_l1u0, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp63_sdw_machines);
diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index 7a567ba..1ae43df 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c
@@ -69,6 +69,28 @@ static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = { }, }; +static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = { + /* Jack Endpoint */ + { + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + /* DMIC Endpoint */ + { + /* + * rt721 endpoint #2 maps to AIF3 (internal DMIC capture). + * Endpoint #1 is AIF2 amp path and is handled by external amps + * on this platform. + */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + static const struct snd_soc_acpi_adr_device rt712_vb_1_group1_adr[] = { { .adr = 0x000130025D071201ull, @@ -563,6 +585,40 @@ static const struct snd_soc_acpi_link_adr acp70_rt1320_l0_rt722_l1[] = { {} }; +static const struct snd_soc_acpi_adr_device rt721_l1u0_tas2783x2_l1u8b_adr[] = { + { + .adr = 0x000130025D072101ull, + /* + * On this platform speakers are provided by two TAS2783 amps. + * Keep rt721 as UAJ + DMIC only. + */ + .num_endpoints = ARRAY_SIZE(jack_dmic_endpoints), + .endpoints = jack_dmic_endpoints, + .name_prefix = "rt721", + }, + { + .adr = 0x0001380102000001ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "tas2783-1", + }, + { + .adr = 0x00013B0102000001ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "tas2783-2", + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_rt721_l1u0_tas2783x2_l1u8b[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt721_l1u0_tas2783x2_l1u8b_adr), + .adr_d = rt721_l1u0_tas2783x2_l1u8b_adr, + }, + {} +}; + struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { { .link_mask = BIT(0) | BIT(1), @@ -650,6 +706,11 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .machine_check = snd_soc_acpi_amd_sdca_is_device_rt712_vb, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(1), + .links = acp70_rt721_l1u0_tas2783x2_l1u8b, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines);
diff --git a/sound/soc/amd/acp3x-rt5682-max9836.c b/sound/soc/amd/acp3x-rt5682-max9836.c index 4ca1978..d1eb6f1 100644 --- a/sound/soc/amd/acp3x-rt5682-max9836.c +++ b/sound/soc/amd/acp3x-rt5682-max9836.c
@@ -94,8 +94,13 @@ static int acp3x_5682_init(struct snd_soc_pcm_runtime *rtd) return ret; } - rt5682_dai_wclk = clk_get(component->dev, "rt5682-dai-wclk"); - rt5682_dai_bclk = clk_get(component->dev, "rt5682-dai-bclk"); + rt5682_dai_wclk = devm_clk_get(component->dev, "rt5682-dai-wclk"); + if (IS_ERR(rt5682_dai_wclk)) + return PTR_ERR(rt5682_dai_wclk); + + rt5682_dai_bclk = devm_clk_get(component->dev, "rt5682-dai-bclk"); + if (IS_ERR(rt5682_dai_bclk)) + return PTR_ERR(rt5682_dai_bclk); ret = snd_soc_card_jack_new_pins(card, "Headset Jack", SND_JACK_HEADSET |
diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 3a20cc1..9751cf0 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c
@@ -339,7 +339,7 @@ static struct snd_soc_acpi_mach *acp63_sdw_machine_select(struct device *dev) mach->mach_params.subsystem_device = acp_data->subsystem_device; mach->mach_params.subsystem_id_set = true; - dev_dbg(dev, "SSID %x%x\n", mach->mach_params.subsystem_vendor, + dev_dbg(dev, "SSID %x%04x\n", mach->mach_params.subsystem_vendor, mach->mach_params.subsystem_device); return mach; }
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f1a6347..aa62009 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -48,6 +48,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15-fc0xxx"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5525"), } @@ -703,6 +710,41 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vivobook_ASUSLaptop M6501RR_M6501RR"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK BM1503CDA"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "PM1503CDA"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "BM1403CDA"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Thin A15 B7VE"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M7601RM"), + } + }, {} };
diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c index cca01c0..5dba741 100644 --- a/sound/soc/cirrus/ep93xx-i2s.c +++ b/sound/soc/cirrus/ep93xx-i2s.c
@@ -91,16 +91,28 @@ static inline unsigned ep93xx_i2s_read_reg(struct ep93xx_i2s_info *info, return __raw_readl(info->regs + reg); } -static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) +static int ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) { unsigned base_reg; + int err; if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 && (ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) { /* Enable clocks */ - clk_prepare_enable(info->mclk); - clk_prepare_enable(info->sclk); - clk_prepare_enable(info->lrclk); + err = clk_prepare_enable(info->mclk); + if (err) + return err; + err = clk_prepare_enable(info->sclk); + if (err) { + clk_disable_unprepare(info->mclk); + return err; + } + err = clk_prepare_enable(info->lrclk); + if (err) { + clk_disable_unprepare(info->sclk); + clk_disable_unprepare(info->mclk); + return err; + } /* Enable i2s */ ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 1); @@ -119,6 +131,8 @@ static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCTRL, EP93XX_I2S_TXCTRL_TXEMPTY_LVL | EP93XX_I2S_TXCTRL_TXUFIE); + + return 0; } static void ep93xx_i2s_disable(struct ep93xx_i2s_info *info, int stream) @@ -195,9 +209,7 @@ static int ep93xx_i2s_startup(struct snd_pcm_substream *substream, { struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(dai); - ep93xx_i2s_enable(info, substream->stream); - - return 0; + return ep93xx_i2s_enable(info, substream->stream); } static void ep93xx_i2s_shutdown(struct snd_pcm_substream *substream, @@ -373,14 +385,16 @@ static int ep93xx_i2s_suspend(struct snd_soc_component *component) static int ep93xx_i2s_resume(struct snd_soc_component *component) { struct ep93xx_i2s_info *info = snd_soc_component_get_drvdata(component); + int err; if (!snd_soc_component_active(component)) return 0; - ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK); - ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE); + err = ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK); + if (err) + return err; - return 0; + return ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE); } #else #define ep93xx_i2s_suspend NULL
diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c index fdee689..d7363f9 100644 --- a/sound/soc/codecs/adau1372.c +++ b/sound/soc/codecs/adau1372.c
@@ -762,7 +762,7 @@ static int adau1372_startup(struct snd_pcm_substream *substream, struct snd_soc_ return 0; } -static void adau1372_enable_pll(struct adau1372 *adau1372) +static int adau1372_enable_pll(struct adau1372 *adau1372) { unsigned int val, timeout = 0; int ret; @@ -778,19 +778,26 @@ static void adau1372_enable_pll(struct adau1372 *adau1372) timeout++; } while (!(val & 1) && timeout < 3); - if (ret < 0 || !(val & 1)) + if (ret < 0 || !(val & 1)) { dev_err(adau1372->dev, "Failed to lock PLL\n"); + return ret < 0 ? ret : -ETIMEDOUT; + } + + return 0; } -static void adau1372_set_power(struct adau1372 *adau1372, bool enable) +static int adau1372_set_power(struct adau1372 *adau1372, bool enable) { if (adau1372->enabled == enable) - return; + return 0; if (enable) { unsigned int clk_ctrl = ADAU1372_CLK_CTRL_MCLK_EN; + int ret; - clk_prepare_enable(adau1372->mclk); + ret = clk_prepare_enable(adau1372->mclk); + if (ret) + return ret; if (adau1372->pd_gpio) gpiod_set_value(adau1372->pd_gpio, 0); @@ -804,7 +811,14 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable) * accessed. */ if (adau1372->use_pll) { - adau1372_enable_pll(adau1372); + ret = adau1372_enable_pll(adau1372); + if (ret) { + regcache_cache_only(adau1372->regmap, true); + if (adau1372->pd_gpio) + gpiod_set_value(adau1372->pd_gpio, 1); + clk_disable_unprepare(adau1372->mclk); + return ret; + } clk_ctrl |= ADAU1372_CLK_CTRL_CLKSRC; } @@ -829,6 +843,8 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable) } adau1372->enabled = enable; + + return 0; } static int adau1372_set_bias_level(struct snd_soc_component *component, @@ -842,11 +858,9 @@ static int adau1372_set_bias_level(struct snd_soc_component *component, case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: - adau1372_set_power(adau1372, true); - break; + return adau1372_set_power(adau1372, true); case SND_SOC_BIAS_OFF: - adau1372_set_power(adau1372, false); - break; + return adau1372_set_power(adau1372, false); } return 0;
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 4707f28..af87eba 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c
@@ -26,7 +26,7 @@ #include "cs35l56.h" -static const struct reg_sequence cs35l56_patch[] = { +static const struct reg_sequence cs35l56_asp_patch[] = { /* * Firmware can change these to non-defaults to satisfy SDCA. * Ensure that they are at known defaults. @@ -43,6 +43,20 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_ASP1TX2_INPUT, 0x00000000 }, { CS35L56_ASP1TX3_INPUT, 0x00000000 }, { CS35L56_ASP1TX4_INPUT, 0x00000000 }, +}; + +int cs35l56_set_asp_patch(struct cs35l56_base *cs35l56_base) +{ + return regmap_register_patch(cs35l56_base->regmap, cs35l56_asp_patch, + ARRAY_SIZE(cs35l56_asp_patch)); +} +EXPORT_SYMBOL_NS_GPL(cs35l56_set_asp_patch, "SND_SOC_CS35L56_SHARED"); + +static const struct reg_sequence cs35l56_patch[] = { + /* + * Firmware can change these to non-defaults to satisfy SDCA. + * Ensure that they are at known defaults. + */ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 },
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 2ff8b17..37909a3 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c
@@ -348,6 +348,13 @@ static int cs35l56_dsp_event(struct snd_soc_dapm_widget *w, return wm_adsp_event(w, kcontrol, event); } +static int cs35l56_asp_dai_probe(struct snd_soc_dai *codec_dai) +{ + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); + + return cs35l56_set_asp_patch(&cs35l56->base); +} + static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); @@ -552,6 +559,7 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai, } static const struct snd_soc_dai_ops cs35l56_ops = { + .probe = cs35l56_asp_dai_probe, .set_fmt = cs35l56_asp_dai_set_fmt, .set_tdm_slot = cs35l56_asp_dai_set_tdm_slot, .hw_params = cs35l56_asp_dai_hw_params, @@ -1617,9 +1625,9 @@ static int cs35l56_process_xu_onchip_speaker_id(struct cs35l56_private *cs35l56, if (num_pulls < 0) return num_pulls; - if (num_pulls != num_gpios) { + if (num_pulls && (num_pulls != num_gpios)) { dev_warn(cs35l56->base.dev, "%s count(%d) != %s count(%d)\n", - pull_name, num_pulls, gpio_name, num_gpios); + pull_name, num_pulls, gpio_name, num_gpios); } ret = cs35l56_check_and_save_onchip_spkid_gpios(&cs35l56->base,
diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index b83bc4d..3e04e68 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c
@@ -699,6 +699,7 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) switch (type & CS42L43_HSDET_TYPE_STS_MASK) { case 0x0: // CTIA case 0x1: // OMTP + case 0x4: return cs42l43_run_load_detect(priv, true); case 0x2: // 3-pole return cs42l43_run_load_detect(priv, false);
diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c index 9f34a6a..03f31d9 100644 --- a/sound/soc/codecs/rt1011.c +++ b/sound/soc/codecs/rt1011.c
@@ -1047,7 +1047,7 @@ static int rt1011_recv_spk_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_to_dapm(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); struct rt1011_priv *rt1011 = snd_soc_component_get_drvdata(component);
diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index 50f6566..8bb7e849 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c
@@ -2629,7 +2629,7 @@ static int rt1320_sdw_hw_params(struct snd_pcm_substream *substream, struct sdw_port_config port_config; struct sdw_port_config dmic_port_config[2]; struct sdw_stream_runtime *sdw_stream; - int retval; + int retval, num_channels; unsigned int sampling_rate; dev_dbg(dai->dev, "%s %s", __func__, dai->name); @@ -2661,7 +2661,8 @@ static int rt1320_sdw_hw_params(struct snd_pcm_substream *substream, dmic_port_config[1].num = 10; break; case RT1321_DEV_ID: - dmic_port_config[0].ch_mask = BIT(0) | BIT(1); + num_channels = params_channels(params); + dmic_port_config[0].ch_mask = GENMASK(num_channels - 1, 0); dmic_port_config[0].num = 8; break; default:
diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index 4bb59e5..5850bf6 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c
@@ -1759,8 +1759,10 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.mode_size * 2 * sizeof(int), GFP_KERNEL); if (!sma1307->set.mode_set[i]) { - for (int j = 0; j < i; j++) - kfree(sma1307->set.mode_set[j]); + for (int j = 0; j < i; j++) { + devm_kfree(sma1307->dev, sma1307->set.mode_set[j]); + sma1307->set.mode_set[j] = NULL; + } sma1307->set.status = false; return; }
diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index c969eb3..a1d86bd 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c
@@ -32,6 +32,10 @@ #define TAS2781_YRAM1_PAGE 42 #define TAS2781_YRAM1_START_REG 88 +#define TAS2781_PG_REG TASDEVICE_REG(0x00, 0x00, 0x7c) +#define TAS2781_PG_1_0 0xA0 +#define TAS2781_PG_2_0 0xA8 + #define TAS2781_YRAM2_START_PAGE 43 #define TAS2781_YRAM2_END_PAGE 49 #define TAS2781_YRAM2_START_REG 8 @@ -98,6 +102,12 @@ struct blktyp_devidx_map { unsigned char dev_idx; }; +struct tas2781_cali_specific { + unsigned char sin_gni[4]; + int sin_gni_reg; + bool is_sin_gn_flush; +}; + static const char deviceNumber[TASDEVICE_DSP_TAS_MAX_DEVICE] = { 1, 2, 1, 2, 1, 1, 0, 2, 4, 3, 1, 2, 3, 4, 1, 2 }; @@ -2454,6 +2464,84 @@ static int tasdevice_load_data(struct tasdevice_priv *tas_priv, return ret; } +static int tas2781_cali_preproc(struct tasdevice_priv *priv, int i) +{ + struct tas2781_cali_specific *spec = priv->tasdevice[i].cali_specific; + struct calidata *cali_data = &priv->cali_data; + struct cali_reg *p = &cali_data->cali_reg_array; + unsigned char *data = cali_data->data; + int rc; + + /* + * On TAS2781, if the Speaker calibrated impedance is lower than + * default value hard-coded inside the TAS2781, it will cuase vol + * lower than normal. In order to fix this issue, the parameter of + * SineGainI need updating. + */ + if (spec == NULL) { + int k = i * (cali_data->cali_dat_sz_per_dev + 1); + int re_org, re_cal, corrected_sin_gn, pg_id; + unsigned char r0_deflt[4]; + + spec = devm_kzalloc(priv->dev, sizeof(*spec), GFP_KERNEL); + if (spec == NULL) + return -ENOMEM; + priv->tasdevice[i].cali_specific = spec; + rc = tasdevice_dev_bulk_read(priv, i, p->r0_reg, r0_deflt, 4); + if (rc < 0) { + dev_err(priv->dev, "invalid RE from %d = %d\n", i, rc); + return rc; + } + /* + * SineGainI need to be re-calculated, calculate the high 16 + * bits. + */ + re_org = r0_deflt[0] << 8 | r0_deflt[1]; + re_cal = data[k + 1] << 8 | data[k + 2]; + if (re_org > re_cal) { + rc = tasdevice_dev_read(priv, i, TAS2781_PG_REG, + &pg_id); + if (rc < 0) { + dev_err(priv->dev, "invalid PG id %d = %d\n", + i, rc); + return rc; + } + + spec->sin_gni_reg = (pg_id == TAS2781_PG_1_0) ? + TASDEVICE_REG(0, 0x1b, 0x34) : + TASDEVICE_REG(0, 0x18, 0x1c); + + rc = tasdevice_dev_bulk_read(priv, i, + spec->sin_gni_reg, + spec->sin_gni, 4); + if (rc < 0) { + dev_err(priv->dev, "wrong sinegaini %d = %d\n", + i, rc); + return rc; + } + corrected_sin_gn = re_org * ((spec->sin_gni[0] << 8) + + spec->sin_gni[1]); + corrected_sin_gn /= re_cal; + spec->sin_gni[0] = corrected_sin_gn >> 8; + spec->sin_gni[1] = corrected_sin_gn & 0xff; + + spec->is_sin_gn_flush = true; + } + } + + if (spec->is_sin_gn_flush) { + rc = tasdevice_dev_bulk_write(priv, i, spec->sin_gni_reg, + spec->sin_gni, 4); + if (rc < 0) { + dev_err(priv->dev, "update failed %d = %d\n", + i, rc); + return rc; + } + } + + return 0; +} + static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) { struct calidata *cali_data = &priv->cali_data; @@ -2462,6 +2550,9 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) int k = i * (cali_data->cali_dat_sz_per_dev + 1); int rc; + if (!data || !cali_data->total_sz) + return; + if (data[k] != i) { dev_err(priv->dev, "%s: no cal-data for dev %d from usr-spc\n", __func__, i); @@ -2469,6 +2560,12 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) } k++; + if (priv->chip_id == TAS2781) { + rc = tas2781_cali_preproc(priv, i); + if (rc < 0) + return; + } + rc = tasdevice_dev_bulk_write(priv, i, p->r0_reg, &(data[k]), 4); if (rc < 0) { dev_err(priv->dev, "chn %d r0_reg bulk_wr err = %d\n", i, rc);
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index c8db33f..bc41a14 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c
@@ -2172,7 +2172,7 @@ static int wcd934x_init_dmic(struct snd_soc_component *comp) u32 def_dmic_rate, dmic_clk_drv; int ret; - ret = wcd_dt_parse_mbhc_data(comp->dev, &wcd->mbhc_cfg); + ret = wcd_dt_parse_micbias_info(&wcd->common); if (ret) return ret;
diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index e64a0d9..6c56134 100644 --- a/sound/soc/fsl/fsl_easrc.c +++ b/sound/soc/fsl/fsl_easrc.c
@@ -52,10 +52,13 @@ static int fsl_easrc_iec958_put_bits(struct snd_kcontrol *kcontrol, struct soc_mreg_control *mc = (struct soc_mreg_control *)kcontrol->private_value; unsigned int regval = ucontrol->value.integer.value[0]; + int ret; + + ret = (easrc_priv->bps_iec958[mc->regbase] != regval); easrc_priv->bps_iec958[mc->regbase] = regval; - return 0; + return ret; } static int fsl_easrc_iec958_get_bits(struct snd_kcontrol *kcontrol, @@ -93,14 +96,17 @@ static int fsl_easrc_set_reg(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); struct soc_mreg_control *mc = (struct soc_mreg_control *)kcontrol->private_value; + struct fsl_asrc *easrc = snd_soc_component_get_drvdata(component); unsigned int regval = ucontrol->value.integer.value[0]; + bool changed; int ret; - ret = snd_soc_component_write(component, mc->regbase, regval); - if (ret < 0) + ret = regmap_update_bits_check(easrc->regmap, mc->regbase, + GENMASK(31, 0), regval, &changed); + if (ret != 0) return ret; - return 0; + return changed; } #define SOC_SINGLE_REG_RW(xname, xreg) \
diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 05b4e97..a4518fe 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c
@@ -710,6 +710,8 @@ static int imx_card_parse_of(struct imx_card_data *data) link->ops = &imx_aif_ops; } + playback_only = false; + capture_only = false; graph_util_parse_link_direction(np, &playback_only, &capture_only); link->playback_only = playback_only; link->capture_only = capture_only;
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index bdc02e8..89d694c 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c
@@ -1038,11 +1038,15 @@ int graph_util_is_ports0(struct device_node *np) else port = np; - struct device_node *ports __free(device_node) = of_get_parent(port); - struct device_node *top __free(device_node) = of_get_parent(ports); - struct device_node *ports0 __free(device_node) = of_get_child_by_name(top, "ports"); + struct device_node *ports __free(device_node) = of_get_parent(port); + const char *at = strchr(kbasename(ports->full_name), '@'); - return ports0 == ports; + /* + * Since child iteration order may differ + * between a base DT and DT overlays, + * string match "ports" or "ports@0" in the node name instead. + */ + return !at || !strcmp(at, "@0"); } EXPORT_SYMBOL_GPL(graph_util_is_ports0); @@ -1179,9 +1183,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (np && playback_only) + if (playback_only && is_playback_only) *playback_only = is_playback_only; - if (np && capture_only) + if (capture_only && is_capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction);
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index c5942b5..d53af8f7 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig
@@ -530,8 +530,6 @@ select SND_SOC_CS42L43_SDW select MFD_CS42L43 select MFD_CS42L43_SDW - select PINCTRL_CS42L43 - select SPI_CS42L43 select SND_SOC_CS35L56_SPI select SND_SOC_CS35L56_SDW select SND_SOC_DMIC
diff --git a/sound/soc/intel/boards/ehl_rt5660.c b/sound/soc/intel/boards/ehl_rt5660.c index 5c7b218..c40cd9f 100644 --- a/sound/soc/intel/boards/ehl_rt5660.c +++ b/sound/soc/intel/boards/ehl_rt5660.c
@@ -127,7 +127,7 @@ static int rt5660_hw_params(struct snd_pcm_substream *substream, params_rate(params) * 50, params_rate(params) * 512); if (ret < 0) - dev_err(codec_dai->dev, "can't set codec pll: %d\n", ret); + dev_err(rtd->dev, "can't set codec pll: %d\n", ret); return ret; }
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index f230991..c18ec60 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c
@@ -763,6 +763,14 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_CODEC_SPKR), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0CCD") + }, + .driver_data = (void *)(SOC_SDW_CODEC_SPKR), + }, /* Pantherlake devices*/ { .callback = sof_sdw_quirk_cb,
diff --git a/sound/soc/intel/catpt/device.c b/sound/soc/intel/catpt/device.c index 0638aec..0b3f20e 100644 --- a/sound/soc/intel/catpt/device.c +++ b/sound/soc/intel/catpt/device.c
@@ -281,7 +281,15 @@ static int catpt_acpi_probe(struct platform_device *pdev) if (IS_ERR(cdev->pci_ba)) return PTR_ERR(cdev->pci_ba); - /* alloc buffer for storing DRAM context during dx transitions */ + /* + * As per design HOST is responsible for preserving firmware's runtime + * context during D0 -> D3 -> D0 transitions. Addresses used for DMA + * to/from HOST memory shall be outside the reserved range of 0xFFFxxxxx. + */ + ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); + if (ret) + return ret; + cdev->dxbuf_vaddr = dmam_alloc_coherent(dev, catpt_dram_size(cdev), &cdev->dxbuf_paddr, GFP_KERNEL); if (!cdev->dxbuf_vaddr)
diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 008a20a..677f348 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c
@@ -125,9 +125,6 @@ int catpt_dmac_probe(struct catpt_dev *cdev) dmac->dev = cdev->dev; dmac->irq = cdev->irq; - ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); - if (ret) - return ret; /* * Caller is responsible for putting device in D0 to allow * for I/O and memory access before probing DW.
diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index de3bdac..168c166 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c
@@ -838,6 +838,7 @@ static const struct snd_soc_component_driver q6apm_fe_dai_component = { .ack = q6apm_dai_ack, .compress_ops = &q6apm_dai_compress_ops, .use_dai_pcm_id = true, + .remove_order = SND_SOC_COMP_ORDER_EARLY, }; static int q6apm_dai_probe(struct platform_device *pdev)
diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 528756f..5be37ee 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c
@@ -278,6 +278,7 @@ static const struct snd_soc_component_driver q6apm_lpass_dai_component = { .of_xlate_dai_name = q6dsp_audio_ports_of_xlate_dai_name, .be_pcm_base = AUDIOREACH_BE_PCM_BASE, .use_dai_pcm_id = true, + .remove_order = SND_SOC_COMP_ORDER_FIRST, }; static int q6apm_lpass_dai_dev_probe(struct platform_device *pdev)
diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 44841fd..970b08c 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c
@@ -715,6 +715,7 @@ static const struct snd_soc_component_driver q6apm_audio_component = { .name = APM_AUDIO_DRV_NAME, .probe = q6apm_audio_probe, .remove = q6apm_audio_remove, + .remove_order = SND_SOC_COMP_ORDER_LAST, }; static int apm_probe(gpr_device_t *gdev)
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index e9964f0..140907a 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c
@@ -1360,10 +1360,10 @@ static int i2s_create_secondary_device(struct samsung_i2s_priv *priv) if (!pdev_sec) return -ENOMEM; - pdev_sec->driver_override = kstrdup("samsung-i2s", GFP_KERNEL); - if (!pdev_sec->driver_override) { + ret = device_set_driver_override(&pdev_sec->dev, "samsung-i2s"); + if (ret) { platform_device_put(pdev_sec); - return -ENOMEM; + return ret; } ret = platform_device_add(pdev_sec);
diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 95b67bb..dca60ee 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c
@@ -216,9 +216,6 @@ static int find_sdca_init_table(struct device *dev, } else if (num_init_writes % sizeof(*raw) != 0) { dev_err(dev, "%pfwP: init table size invalid\n", function_node); return -EINVAL; - } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) { - dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node); - return -EINVAL; } raw = kzalloc(num_init_writes, GFP_KERNEL); @@ -1156,9 +1153,12 @@ static int find_sdca_entity_iot(struct device *dev, if (!terminal->is_dataport) { const char *type_name = sdca_find_terminal_name(terminal->type); - if (type_name) + if (type_name) { entity->label = devm_kasprintf(dev, GFP_KERNEL, "%s %s", entity->label, type_name); + if (!entity->label) + return -ENOMEM; + } } ret = fwnode_property_read_u32(entity_node, @@ -1601,10 +1601,19 @@ static int find_sdca_entities(struct device *dev, struct sdw_slave *sdw, static struct sdca_entity *find_sdca_entity_by_label(struct sdca_function_data *function, const char *entity_label) { + struct sdca_entity *entity = NULL; int i; for (i = 0; i < function->num_entities; i++) { - struct sdca_entity *entity = &function->entities[i]; + entity = &function->entities[i]; + + /* check whole string first*/ + if (!strcmp(entity->label, entity_label)) + return entity; + } + + for (i = 0; i < function->num_entities; i++) { + entity = &function->entities[i]; if (!strncmp(entity->label, entity_label, strlen(entity_label))) return entity;
diff --git a/sound/soc/sdca/sdca_interrupts.c b/sound/soc/sdca/sdca_interrupts.c index d9e22cf..95b1ab4 100644 --- a/sound/soc/sdca/sdca_interrupts.c +++ b/sound/soc/sdca/sdca_interrupts.c
@@ -265,9 +265,9 @@ static int sdca_irq_request_locked(struct device *dev, } /** - * sdca_request_irq - request an individual SDCA interrupt + * sdca_irq_request - request an individual SDCA interrupt * @dev: Pointer to the struct device against which things should be allocated. - * @interrupt_info: Pointer to the interrupt information structure. + * @info: Pointer to the interrupt information structure. * @sdca_irq: SDCA interrupt position. * @name: Name to be given to the IRQ. * @handler: A callback thread function to be called for the IRQ.
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index d0fffef..ff6eb6b 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c
@@ -462,8 +462,7 @@ static void soc_free_pcm_runtime(struct snd_soc_pcm_runtime *rtd) list_del(&rtd->list); - if (delayed_work_pending(&rtd->delayed_work)) - flush_delayed_work(&rtd->delayed_work); + flush_delayed_work(&rtd->delayed_work); snd_soc_pcm_component_free(rtd); /* @@ -1864,12 +1863,15 @@ static void cleanup_dmi_name(char *name) /* * Check if a DMI field is valid, i.e. not containing any string - * in the black list. + * in the black list and not the empty string. */ static int is_dmi_valid(const char *field) { int i = 0; + if (!field[0]) + return 0; + while (dmi_blacklist[i]) { if (strstr(field, dmi_blacklist[i])) return 0; @@ -2122,6 +2124,9 @@ static void soc_cleanup_card_resources(struct snd_soc_card *card) for_each_card_rtds(card, rtd) if (rtd->initialized) snd_soc_link_exit(rtd); + /* flush delayed work before removing DAIs and DAPM widgets */ + snd_soc_flush_all_delayed_work(card); + /* remove and free each DAI */ soc_remove_link_dais(card); soc_remove_link_components(card); @@ -2854,6 +2859,7 @@ int snd_soc_component_initialize(struct snd_soc_component *component, INIT_LIST_HEAD(&component->dobj_list); INIT_LIST_HEAD(&component->card_list); INIT_LIST_HEAD(&component->list); + INIT_LIST_HEAD(&component->card_aux_list); mutex_init(&component->io_mutex); if (!component->name) {
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index db077e9..c12ffdc 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c
@@ -2950,7 +2950,7 @@ static int sof_ipc4_control_load_bytes(struct snd_sof_dev *sdev, struct snd_sof_ return -EINVAL; } - if (scontrol->priv_size < sizeof(struct sof_abi_hdr)) { + if (scontrol->priv_size && scontrol->priv_size < sizeof(struct sof_abi_hdr)) { dev_err(sdev->dev, "bytes control %s initial data size %zu is insufficient.\n", scontrol->name, scontrol->priv_size);
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 18e2401..35200d80 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c
@@ -736,7 +736,7 @@ static int sof_parse_token_sets(struct snd_soc_component *scomp, asize = le32_to_cpu(array->size); /* validate asize */ - if (asize < 0) { /* FIXME: A zero-size array makes no sense */ + if (asize < sizeof(*array)) { dev_err(scomp->dev, "error: invalid array size 0x%x\n", asize); return -EINVAL;
diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 94b5ab7..ea10e6e 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c
@@ -231,6 +231,15 @@ static const struct tegra_audio_cdata tegra186_data = { .plla_out0_rates[x11_RATE] = 45158400, }; +static const struct tegra_audio_cdata tegra238_data = { + /* PLLA */ + .plla_rates[x8_RATE] = 1277952000, + .plla_rates[x11_RATE] = 1264435200, + /* PLLA_OUT0 */ + .plla_out0_rates[x8_RATE] = 49152000, + .plla_out0_rates[x11_RATE] = 45158400, +}; + static const struct tegra_audio_cdata tegra264_data = { /* PLLA1 */ .plla_rates[x8_RATE] = 983040000, @@ -245,6 +254,8 @@ static const struct of_device_id graph_of_tegra_match[] = { .data = &tegra210_data }, { .compatible = "nvidia,tegra186-audio-graph-card", .data = &tegra186_data }, + { .compatible = "nvidia,tegra238-audio-graph-card", + .data = &tegra238_data }, { .compatible = "nvidia,tegra264-audio-graph-card", .data = &tegra264_data }, {},
diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig index 9b890ab..b458891 100644 --- a/sound/usb/Kconfig +++ b/sound/usb/Kconfig
@@ -192,6 +192,7 @@ tristate "Qualcomm Audio Offload driver" depends on QCOM_QMI_HELPERS && SND_USB_AUDIO && SND_SOC_USB depends on USB_XHCI_HCD && USB_XHCI_SIDEBAND + select AUXILIARY_BUS help Say Y here to enable the Qualcomm USB audio offloading feature.
diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index dfd8204..3a71bab 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c
@@ -488,7 +488,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) memset(id, 0, sizeof(id)); for (c = card->shortname, len = 0; - *c && len < sizeof(card->id); c++) + *c && len < sizeof(card->id) - 1; c++) if (*c != ' ') id[len++] = *c;
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 73bce971..bf4401a 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c
@@ -160,8 +160,8 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep) * This won't be used for implicit feedback which takes the packet size * returned from the sync source */ -static int slave_next_packet_size(struct snd_usb_endpoint *ep, - unsigned int avail) +static int synced_next_packet_size(struct snd_usb_endpoint *ep, + unsigned int avail) { unsigned int phase; int ret; @@ -221,13 +221,14 @@ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, packet = ctx->packet_size[idx]; if (packet) { + packet = min(packet, ep->maxframesize); if (avail && packet >= avail) return -EAGAIN; return packet; } if (ep->sync_source) - return slave_next_packet_size(ep, avail); + return synced_next_packet_size(ep, avail); else return next_packet_size(ep, avail); } @@ -1378,6 +1379,9 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, return -EINVAL; } + ep->packsize[0] = min(ep->packsize[0], ep->maxframesize); + ep->packsize[1] = min(ep->packsize[1], ep->maxframesize); + /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; ep->freqshift = INT_MIN;
diff --git a/sound/usb/format.c b/sound/usb/format.c index 64cfe4a..1207c50 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c
@@ -305,17 +305,48 @@ static bool s1810c_valid_sample_rate(struct audioformat *fp, } /* - * Many Focusrite devices supports a limited set of sampling rates per - * altsetting. Maximum rate is exposed in the last 4 bytes of Format Type - * descriptor which has a non-standard bLength = 10. + * Focusrite devices use rate pairs: 44100/48000, 88200/96000, and + * 176400/192000. Return true if rate is in the pair for max_rate. + */ +static bool focusrite_rate_pair(unsigned int rate, + unsigned int max_rate) +{ + switch (max_rate) { + case 48000: return rate == 44100 || rate == 48000; + case 96000: return rate == 88200 || rate == 96000; + case 192000: return rate == 176400 || rate == 192000; + default: return true; + } +} + +/* + * Focusrite devices report all supported rates in a single clock + * source but only a subset is valid per altsetting. + * + * Detection uses two descriptor features: + * + * 1. Format Type descriptor bLength == 10: non-standard extension + * with max sample rate in bytes 6..9. + * + * 2. bmControls VAL_ALT_SETTINGS readable bit: when set, the device + * only supports the highest rate pair for that altsetting, and when + * clear, all rates up to max_rate are valid. + * + * For devices without the bLength == 10 extension but with + * VAL_ALT_SETTINGS readable and multiple altsettings (only seen in + * Scarlett 18i8 3rd Gen playback), fall back to the Focusrite + * convention: alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz. */ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { + struct usb_interface *iface; struct usb_host_interface *alts; + struct uac2_as_header_descriptor *as; unsigned char *fmt; unsigned int max_rate; + bool val_alt; alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); if (!alts) @@ -326,9 +357,21 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (!fmt) return true; + as = snd_usb_find_csint_desc(alts->extra, alts->extralen, + NULL, UAC_AS_GENERAL); + if (!as) + return true; + + val_alt = uac_v2v3_control_is_readable(as->bmControls, + UAC2_AS_VAL_ALT_SETTINGS); + if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); + if (val_alt) + return focusrite_rate_pair(rate, max_rate); + + /* No val_alt: rates fall through from higher */ switch (max_rate) { case 192000: if (rate == 176400 || rate == 192000) @@ -344,12 +387,29 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); - return true; } } - return true; + if (!val_alt) + return true; + + /* Multi-altsetting device with val_alt but no max_rate + * in the format descriptor. Use Focusrite convention: + * alt 1 = 48kHz, alt 2 = 96kHz, alt 3 = 192kHz. + */ + iface = usb_ifnum_to_if(chip->dev, fp->iface); + if (!iface || iface->num_altsetting <= 2) + return true; + + switch (fp->altsetting) { + case 1: max_rate = 48000; break; + case 2: max_rate = 96000; break; + case 3: max_rate = 192000; break; + default: return true; + } + + return focusrite_rate_pair(rate, max_rate); } /*
diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c index 473cb29..7eac7d1 100644 --- a/sound/usb/mixer_s1810c.c +++ b/sound/usb/mixer_s1810c.c
@@ -71,7 +71,7 @@ * * e I guess the same as with mixer * */ -/** struct s1810c_ctl_packet - basic vendor request +/* struct s1810c_ctl_packet - basic vendor request * @selector: device/mixer/output * @b: request-dependant field b * @tag: fixed value identifying type of request @@ -94,14 +94,14 @@ struct s1810c_ctl_packet { __le32 e; }; -/** selectors for CMD request +/* selectors for CMD request */ #define SC1810C_SEL_DEVICE 0 #define SC1810C_SEL_MIXER 0x64 #define SC1810C_SEL_OUTPUT 0x65 -/** control ids */ +/* control ids */ #define SC1810C_CTL_LINE_SW 0 #define SC1810C_CTL_MUTE_SW 1 #define SC1824C_CTL_MONO_SW 2 @@ -127,7 +127,7 @@ struct s1810c_ctl_packet { #define SC1810C_GET_STATE_TAG SC1810C_SET_STATE_TAG #define SC1810C_GET_STATE_LEN SC1810C_SET_STATE_LEN -/** Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB). +/* Mixer levels normally range from 0 (off) to 0x0100 0000 (0 dB). * raw_level = 2^24 * 10^(db_level / 20), thus * -3dB = 0xb53bf0 (technically, half-power -3.01...dB would be 0xb504f3) * -96dB = 0x109 @@ -145,7 +145,7 @@ struct s1810c_ctl_packet { #define MIXER_LEVEL_N3DB 0xb53bf0 #define MIXER_LEVEL_0DB 0x1000000 -/** +/* * This packet includes mixer volumes and * various other fields, it's an extended * version of ctl_packet, with a and b @@ -155,7 +155,7 @@ struct s1810c_state_packet { __le32 fields[63]; }; -/** indices into s1810c_state_packet.fields[] +/* indices into s1810c_state_packet.fields[] */ #define SC1810C_STATE_TAG_IDX 2 #define SC1810C_STATE_LEN_IDX 3
diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 85a0316..fd1fb66 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c
@@ -1328,8 +1328,6 @@ struct scarlett2_data { struct snd_kcontrol *mux_ctls[SCARLETT2_MUX_MAX]; struct snd_kcontrol *mix_ctls[SCARLETT2_MIX_MAX]; struct snd_kcontrol *compressor_ctls[SCARLETT2_COMPRESSOR_CTLS_MAX]; - struct snd_kcontrol *precomp_flt_ctls[SCARLETT2_PRECOMP_FLT_CTLS_MAX]; - struct snd_kcontrol *peq_flt_ctls[SCARLETT2_PEQ_FLT_CTLS_MAX]; struct snd_kcontrol *precomp_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX]; struct snd_kcontrol *peq_flt_switch_ctls[SCARLETT2_DSP_SWITCH_MAX]; struct snd_kcontrol *direct_monitor_ctl; @@ -3447,7 +3445,6 @@ static int scarlett2_update_autogain(struct usb_mixer_interface *mixer) private->autogain_status[i] = private->num_autogain_status_texts - 1; - for (i = 0; i < SCARLETT2_AG_TARGET_COUNT; i++) if (scarlett2_has_config_item(private, scarlett2_ag_target_configs[i])) { @@ -5372,8 +5369,7 @@ static int scarlett2_update_filter_values(struct usb_mixer_interface *mixer) err = scarlett2_usb_get_config( mixer, SCARLETT2_CONFIG_PEQ_FLT_SWITCH, - info->dsp_input_count * info->peq_flt_count, - private->peq_flt_switch); + info->dsp_input_count, private->peq_flt_switch); if (err < 0) return err; @@ -6546,7 +6542,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i) err = scarlett2_add_new_ctl( mixer, &scarlett2_precomp_flt_ctl, i * info->precomp_flt_count + j, - 1, s, &private->precomp_flt_switch_ctls[j]); + 1, s, NULL); if (err < 0) return err; } @@ -6556,7 +6552,7 @@ static int scarlett2_add_dsp_ctls(struct usb_mixer_interface *mixer, int i) err = scarlett2_add_new_ctl( mixer, &scarlett2_peq_flt_ctl, i * info->peq_flt_count + j, - 1, s, &private->peq_flt_switch_ctls[j]); + 1, s, NULL); if (err < 0) return err; } @@ -8255,6 +8251,8 @@ static int scarlett2_find_fc_interface(struct usb_device *dev, if (desc->bInterfaceClass != 255) continue; + if (desc->bNumEndpoints < 1) + continue; epd = get_endpoint(intf->altsetting, 0); private->bInterfaceNumber = desc->bInterfaceNumber;
diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 01e6063..2ac813d 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c
@@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include <linux/auxiliary_bus.h> @@ -699,6 +699,7 @@ static void uaudio_event_ring_cleanup_free(struct uaudio_dev *dev) uaudio_iommu_unmap(MEM_EVENT_RING, IOVA_BASE, PAGE_SIZE, PAGE_SIZE); xhci_sideband_remove_interrupter(uadev[dev->chip->card->number].sb); + usb_offload_put(dev->udev); } } @@ -1007,7 +1008,7 @@ static int enable_audio_stream(struct snd_usb_substream *subs, /** * uaudio_transfer_buffer_setup() - fetch and populate xfer buffer params * @subs: usb substream - * @xfer_buf: xfer buf to be allocated + * @xfer_buf_cpu: xfer buf to be allocated * @xfer_buf_len: size of allocation * @mem_info: QMI response info * @@ -1182,12 +1183,16 @@ static int uaudio_event_ring_setup(struct snd_usb_substream *subs, dma_coherent = dev_is_dma_coherent(subs->dev->bus->sysdev); er_pa = 0; + ret = usb_offload_get(subs->dev); + if (ret < 0) + goto exit; + /* event ring */ ret = xhci_sideband_create_interrupter(uadev[card_num].sb, 1, false, 0, uaudio_qdev->data->intr_num); if (ret < 0) { dev_err(&subs->dev->dev, "failed to fetch interrupter\n"); - goto exit; + goto put_offload; } sgt = xhci_sideband_get_event_buffer(uadev[card_num].sb); @@ -1219,6 +1224,8 @@ static int uaudio_event_ring_setup(struct snd_usb_substream *subs, mem_info->dma = 0; remove_interrupter: xhci_sideband_remove_interrupter(uadev[card_num].sb); +put_offload: + usb_offload_put(subs->dev); exit: return ret; } @@ -1482,6 +1489,7 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, uaudio_iommu_unmap(MEM_EVENT_RING, IOVA_BASE, PAGE_SIZE, PAGE_SIZE); free_sec_ring: xhci_sideband_remove_interrupter(uadev[card_num].sb); + usb_offload_put(subs->dev); drop_sync_ep: if (subs->sync_endpoint) { uaudio_iommu_unmap(MEM_XFER_RING,
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4cac0df..4cfa24c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c
@@ -2148,6 +2148,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { /* Device matches */ DEVICE_FLG(0x001f, 0x0b21, /* AB13X USB Audio */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), + DEVICE_FLG(0x001f, 0x0b23, /* AB17X USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0020, 0x0b21, /* GHW-123P */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */ @@ -2219,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x05e1, 0x0480, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x0624, 0x3d3f, /* AB13X USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0644, 0x8043, /* TEAC UD-501/UD-501V2/UD-503/NT-503 */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), @@ -2241,6 +2245,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IFACE_DELAY | QUIRK_FLAG_FORCE_IFACE_RESET), DEVICE_FLG(0x0661, 0x0883, /* iBasso DC04 Ultra */ QUIRK_FLAG_DSD_RAW), + DEVICE_FLG(0x0666, 0x0880, /* SPACETOUCH USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x06f8, 0xb000, /* Hercules DJ Console (Windows Edition) */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */ @@ -2299,6 +2305,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */ QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x152a, 0x880a, /* NeuralDSP Quad Cortex */ + 0), /* Doesn't have the vendor quirk which would otherwise apply */ DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x154e, 0x1003, /* Denon DA-300USB */ @@ -2365,6 +2373,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7281, /* Hauppauge HVR-950Q-MXL */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x20b1, 0x2009, /* XMOS Ltd DIYINHK USB Audio 2.0 */ + QUIRK_FLAG_SKIP_IMPLICIT_FB | QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2040, 0x8200, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ @@ -2423,8 +2433,13 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), + DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ + DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */ + DEVICE_FLG(0x1235, 0x8016, 0), /* Focusrite Scarlett 2i2 1st Gen */ + DEVICE_FLG(0x1235, 0x801c, 0), /* Focusrite Scarlett Solo 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ - QUIRK_FLAG_VALIDATE_RATES), + QUIRK_FLAG_SKIP_CLOCK_SELECTOR | + QUIRK_FLAG_SKIP_IFACE_SETUP), VENDOR_FLG(0x1511, /* AURALiC */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x152a, /* Thesycon devices */ @@ -2506,6 +2521,7 @@ static const char *const snd_usb_audio_quirk_flag_names[] = { QUIRK_STRING_ENTRY(MIC_RES_384), QUIRK_STRING_ENTRY(MIXER_PLAYBACK_MIN_MUTE), QUIRK_STRING_ENTRY(MIXER_CAPTURE_MIN_MUTE), + QUIRK_STRING_ENTRY(SKIP_IFACE_SETUP), NULL };
diff --git a/sound/usb/stream.c b/sound/usb/stream.c index ac4d920..d38c39e 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c
@@ -1259,6 +1259,9 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, set_iface_first = true; /* try to set the interface... */ + if (chip->quirk_flags & QUIRK_FLAG_SKIP_IFACE_SETUP) + continue; + usb_set_interface(chip->dev, iface_no, 0); if (set_iface_first) usb_set_interface(chip->dev, iface_no, altno);
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 79978ca..085530c 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h
@@ -224,6 +224,10 @@ extern bool snd_usb_skip_validation; * playback value represents muted state instead of minimum audible volume * QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE * Similar to QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE, but for capture streams + * QUIRK_FLAG_SKIP_IFACE_SETUP + * Skip the probe-time interface setup (usb_set_interface, + * init_pitch, init_sample_rate); redundant with + * snd_usb_endpoint_prepare() at stream-open time */ enum { @@ -253,6 +257,7 @@ enum { QUIRK_TYPE_MIC_RES_384 = 23, QUIRK_TYPE_MIXER_PLAYBACK_MIN_MUTE = 24, QUIRK_TYPE_MIXER_CAPTURE_MIN_MUTE = 25, + QUIRK_TYPE_SKIP_IFACE_SETUP = 26, /* Please also edit snd_usb_audio_quirk_flag_names */ }; @@ -284,5 +289,6 @@ enum { #define QUIRK_FLAG_MIC_RES_384 QUIRK_FLAG(MIC_RES_384) #define QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE QUIRK_FLAG(MIXER_PLAYBACK_MIN_MUTE) #define QUIRK_FLAG_MIXER_CAPTURE_MIN_MUTE QUIRK_FLAG(MIXER_CAPTURE_MIN_MUTE) +#define QUIRK_FLAG_SKIP_IFACE_SETUP QUIRK_FLAG(SKIP_IFACE_SETUP) #endif /* __USBAUDIO_H */
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index 011ea96..f00b533 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c
@@ -520,8 +520,6 @@ static int us122l_usb_probe(struct usb_interface *intf, return err; } - usb_get_intf(usb_ifnum_to_if(device, 0)); - usb_get_dev(device); *cardp = card; return 0; } @@ -542,11 +540,9 @@ static int snd_us122l_probe(struct usb_interface *intf, if (intf->cur_altsetting->desc.bInterfaceNumber != 1) return 0; - err = us122l_usb_probe(usb_get_intf(intf), id, &card); - if (err < 0) { - usb_put_intf(intf); + err = us122l_usb_probe(intf, id, &card); + if (err < 0) return err; - } usb_set_intfdata(intf, card); return 0; @@ -574,10 +570,6 @@ static void snd_us122l_disconnect(struct usb_interface *intf) snd_usbmidi_disconnect(p); } - usb_put_intf(usb_ifnum_to_if(us122l->dev, 0)); - usb_put_intf(usb_ifnum_to_if(us122l->dev, 1)); - usb_put_dev(us122l->dev); - snd_card_free_when_closed(card); }
diff --git a/sound/usb/usx2y/us144mkii.c b/sound/usb/usx2y/us144mkii.c index bc71968..0cf4fa7 100644 --- a/sound/usb/usx2y/us144mkii.c +++ b/sound/usb/usx2y/us144mkii.c
@@ -10,8 +10,8 @@ MODULE_AUTHOR("Šerif Rami <ramiserifpersia@gmail.com>"); MODULE_DESCRIPTION("ALSA Driver for TASCAM US-144MKII"); MODULE_LICENSE("GPL"); -/** - * @brief Module parameters for ALSA card instantiation. +/* + * Module parameters for ALSA card instantiation. * * These parameters allow users to configure how the ALSA sound card * for the TASCAM US-144MKII is instantiated. @@ -269,7 +269,7 @@ void tascam_stop_work_handler(struct work_struct *work) atomic_set(&tascam->active_urbs, 0); } -/** +/* * tascam_card_private_free() - Frees private data associated with the sound * card. * @card: Pointer to the ALSA sound card instance. @@ -291,7 +291,7 @@ static void tascam_card_private_free(struct snd_card *card) } } -/** +/* * tascam_suspend() - Handles device suspension. * @intf: The USB interface being suspended. * @message: Power management message. @@ -332,7 +332,7 @@ static int tascam_suspend(struct usb_interface *intf, pm_message_t message) return 0; } -/** +/* * tascam_resume() - Handles device resumption from suspend. * @intf: The USB interface being resumed. * @@ -390,7 +390,7 @@ static void tascam_error_timer(struct timer_list *t) schedule_work(&tascam->midi_out_work); } -/** +/* * tascam_probe() - Probes for the TASCAM US-144MKII device. * @intf: The USB interface being probed. * @usb_id: The USB device ID. @@ -565,7 +565,7 @@ static int tascam_probe(struct usb_interface *intf, return err; } -/** +/* * tascam_disconnect() - Disconnects the TASCAM US-144MKII device. * @intf: The USB interface being disconnected. *
diff --git a/sound/usb/usx2y/us144mkii_capture.c b/sound/usb/usx2y/us144mkii_capture.c index 00188ff..af120bf 100644 --- a/sound/usb/usx2y/us144mkii_capture.c +++ b/sound/usb/usx2y/us144mkii_capture.c
@@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_capture_open() - Opens the PCM capture substream. * @substream: The ALSA PCM substream to open. * @@ -23,7 +23,7 @@ static int tascam_capture_open(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_close() - Closes the PCM capture substream. * @substream: The ALSA PCM substream to close. * @@ -41,7 +41,7 @@ static int tascam_capture_close(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_prepare() - Prepares the PCM capture substream for use. * @substream: The ALSA PCM substream to prepare. * @@ -62,7 +62,7 @@ static int tascam_capture_prepare(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_capture_pointer() - Returns the current capture pointer position. * @substream: The ALSA PCM substream. * @@ -91,7 +91,7 @@ tascam_capture_pointer(struct snd_pcm_substream *substream) return do_div(pos, runtime->buffer_size); } -/** +/* * tascam_capture_ops - ALSA PCM operations for capture. * * This structure defines the callback functions for capture stream operations, @@ -109,7 +109,7 @@ const struct snd_pcm_ops tascam_capture_ops = { .pointer = tascam_capture_pointer, }; -/** +/* * decode_tascam_capture_block() - Decodes a raw 512-byte block from the device. * @src_block: Pointer to the 512-byte raw source block. * @dst_block: Pointer to the destination buffer for decoded audio frames.
diff --git a/sound/usb/usx2y/us144mkii_controls.c b/sound/usb/usx2y/us144mkii_controls.c index 62055fb..81ded11 100644 --- a/sound/usb/usx2y/us144mkii_controls.c +++ b/sound/usb/usx2y/us144mkii_controls.c
@@ -3,8 +3,8 @@ #include "us144mkii.h" -/** - * @brief Text descriptions for playback output source options. +/* + * Text descriptions for playback output source options. * * Used by ALSA kcontrol elements to provide user-friendly names for * the playback routing options (e.g., "Playback 1-2", "Playback 3-4"). @@ -12,15 +12,15 @@ static const char *const playback_source_texts[] = { "Playback 1-2", "Playback 3-4" }; -/** - * @brief Text descriptions for capture input source options. +/* + * Text descriptions for capture input source options. * * Used by ALSA kcontrol elements to provide user-friendly names for * the capture routing options (e.g., "Analog In", "Digital In"). */ static const char *const capture_source_texts[] = { "Analog In", "Digital In" }; -/** +/* * tascam_playback_source_info() - ALSA control info callback for playback * source. * @kcontrol: The ALSA kcontrol instance. @@ -38,7 +38,7 @@ static int tascam_playback_source_info(struct snd_kcontrol *kcontrol, return snd_ctl_enum_info(uinfo, 1, 2, playback_source_texts); } -/** +/* * tascam_line_out_get() - ALSA control get callback for Line Outputs Source. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure to fill. @@ -60,7 +60,7 @@ static int tascam_line_out_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_line_out_put() - ALSA control put callback for Line Outputs Source. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure containing the new value. @@ -89,7 +89,7 @@ static int tascam_line_out_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_line_out_control - ALSA kcontrol definition for Line Outputs Source. * * This defines a new ALSA mixer control named "Line OUTPUTS Source" that allows @@ -106,7 +106,7 @@ static const struct snd_kcontrol_new tascam_line_out_control = { .put = tascam_line_out_put, }; -/** +/* * tascam_digital_out_get() - ALSA control get callback for Digital Outputs * Source. * @kcontrol: The ALSA kcontrol instance. @@ -129,7 +129,7 @@ static int tascam_digital_out_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_digital_out_put() - ALSA control put callback for Digital Outputs * Source. * @kcontrol: The ALSA kcontrol instance. @@ -159,7 +159,7 @@ static int tascam_digital_out_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_digital_out_control - ALSA kcontrol definition for Digital Outputs * Source. * @@ -177,7 +177,7 @@ static const struct snd_kcontrol_new tascam_digital_out_control = { .put = tascam_digital_out_put, }; -/** +/* * tascam_capture_source_info() - ALSA control info callback for capture source. * @kcontrol: The ALSA kcontrol instance. * @uinfo: The ALSA control element info structure to fill. @@ -194,7 +194,7 @@ static int tascam_capture_source_info(struct snd_kcontrol *kcontrol, return snd_ctl_enum_info(uinfo, 1, 2, capture_source_texts); } -/** +/* * tascam_capture_12_get() - ALSA control get callback for Capture channels 1 * and 2 Source. * @kcontrol: The ALSA kcontrol instance. @@ -217,7 +217,7 @@ static int tascam_capture_12_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_capture_12_put() - ALSA control put callback for Capture channels 1 * and 2 Source. * @kcontrol: The ALSA kcontrol instance. @@ -247,7 +247,7 @@ static int tascam_capture_12_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_capture_12_control - ALSA kcontrol definition for Capture channels 1 * and 2 Source. * @@ -265,7 +265,7 @@ static const struct snd_kcontrol_new tascam_capture_12_control = { .put = tascam_capture_12_put, }; -/** +/* * tascam_capture_34_get() - ALSA control get callback for Capture channels 3 * and 4 Source. * @kcontrol: The ALSA kcontrol instance. @@ -288,7 +288,7 @@ static int tascam_capture_34_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_capture_34_put() - ALSA control put callback for Capture channels 3 * and 4 Source. * @kcontrol: The ALSA kcontrol instance. @@ -318,7 +318,7 @@ static int tascam_capture_34_put(struct snd_kcontrol *kcontrol, return changed; } -/** +/* * tascam_capture_34_control - ALSA kcontrol definition for Capture channels 3 * and 4 Source. * @@ -336,7 +336,7 @@ static const struct snd_kcontrol_new tascam_capture_34_control = { .put = tascam_capture_34_put, }; -/** +/* * tascam_samplerate_info() - ALSA control info callback for Sample Rate. * @kcontrol: The ALSA kcontrol instance. * @uinfo: The ALSA control element info structure to fill. @@ -356,7 +356,7 @@ static int tascam_samplerate_info(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_samplerate_get() - ALSA control get callback for Sample Rate. * @kcontrol: The ALSA kcontrol instance. * @ucontrol: The ALSA control element value structure to fill. @@ -400,7 +400,7 @@ static int tascam_samplerate_get(struct snd_kcontrol *kcontrol, return 0; } -/** +/* * tascam_samplerate_control - ALSA kcontrol definition for Sample Rate. * * This defines a new ALSA mixer control named "Sample Rate" that displays
diff --git a/sound/usb/usx2y/us144mkii_midi.c b/sound/usb/usx2y/us144mkii_midi.c index ed2afec..4871797 100644 --- a/sound/usb/usx2y/us144mkii_midi.c +++ b/sound/usb/usx2y/us144mkii_midi.c
@@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_midi_in_work_handler() - Deferred work for processing MIDI input. * @work: The work_struct instance. * @@ -75,7 +75,7 @@ void tascam_midi_in_urb_complete(struct urb *urb) usb_put_urb(urb); } -/** +/* * tascam_midi_in_open() - Opens the MIDI input substream. * @substream: The ALSA rawmidi substream to open. * @@ -92,7 +92,7 @@ static int tascam_midi_in_open(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_in_close() - Closes the MIDI input substream. * @substream: The ALSA rawmidi substream to close. * @@ -103,7 +103,7 @@ static int tascam_midi_in_close(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_in_trigger() - Triggers MIDI input stream activity. * @substream: The ALSA rawmidi substream. * @up: Boolean indicating whether to start (1) or stop (0) the stream. @@ -150,7 +150,7 @@ static void tascam_midi_in_trigger(struct snd_rawmidi_substream *substream, } } -/** +/* * tascam_midi_in_ops - ALSA rawmidi operations for MIDI input. * * This structure defines the callback functions for MIDI input stream @@ -205,7 +205,7 @@ void tascam_midi_out_urb_complete(struct urb *urb) usb_put_urb(urb); } -/** +/* * tascam_midi_out_work_handler() - Deferred work for sending MIDI data * @work: The work_struct instance. * @@ -282,7 +282,7 @@ static void tascam_midi_out_work_handler(struct work_struct *work) } } -/** +/* * tascam_midi_out_open() - Opens the MIDI output substream. * @substream: The ALSA rawmidi substream to open. * @@ -301,7 +301,7 @@ static int tascam_midi_out_open(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_out_close() - Closes the MIDI output substream. * @substream: The ALSA rawmidi substream to close. * @@ -312,7 +312,7 @@ static int tascam_midi_out_close(struct snd_rawmidi_substream *substream) return 0; } -/** +/* * tascam_midi_out_drain() - Drains the MIDI output stream. * @substream: The ALSA rawmidi substream. * @@ -340,7 +340,7 @@ static void tascam_midi_out_drain(struct snd_rawmidi_substream *substream) usb_kill_anchored_urbs(&tascam->midi_out_anchor); } -/** +/* * tascam_midi_out_trigger() - Triggers MIDI output stream activity. * @substream: The ALSA rawmidi substream. * @up: Boolean indicating whether to start (1) or stop (0) the stream. @@ -361,7 +361,7 @@ static void tascam_midi_out_trigger(struct snd_rawmidi_substream *substream, } } -/** +/* * tascam_midi_out_ops - ALSA rawmidi operations for MIDI output. * * This structure defines the callback functions for MIDI output stream
diff --git a/sound/usb/usx2y/us144mkii_playback.c b/sound/usb/usx2y/us144mkii_playback.c index 0cb9699..7efaca0a 100644 --- a/sound/usb/usx2y/us144mkii_playback.c +++ b/sound/usb/usx2y/us144mkii_playback.c
@@ -3,7 +3,7 @@ #include "us144mkii.h" -/** +/* * tascam_playback_open() - Opens the PCM playback substream. * @substream: The ALSA PCM substream to open. * @@ -23,7 +23,7 @@ static int tascam_playback_open(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_close() - Closes the PCM playback substream. * @substream: The ALSA PCM substream to close. * @@ -41,7 +41,7 @@ static int tascam_playback_close(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_prepare() - Prepares the PCM playback substream for use. * @substream: The ALSA PCM substream to prepare. * @@ -108,7 +108,7 @@ static int tascam_playback_prepare(struct snd_pcm_substream *substream) return 0; } -/** +/* * tascam_playback_pointer() - Returns the current playback pointer position. * @substream: The ALSA PCM substream. * @@ -137,7 +137,7 @@ tascam_playback_pointer(struct snd_pcm_substream *substream) return do_div(pos, runtime->buffer_size); } -/** +/* * tascam_playback_ops - ALSA PCM operations for playback. * * This structure defines the callback functions for playback stream operations,
diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 4bb4893..f62b7cc 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c
@@ -281,7 +281,7 @@ static const struct usb_desc_validator audio_validators[] = { /* UAC_VERSION_2, UAC2_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC3 */ - FIXED(UAC_VERSION_2, UAC_HEADER, struct uac3_ac_header_descriptor), + FIXED(UAC_VERSION_3, UAC_HEADER, struct uac3_ac_header_descriptor), FIXED(UAC_VERSION_3, UAC_INPUT_TERMINAL, struct uac3_input_terminal_descriptor), FIXED(UAC_VERSION_3, UAC_OUTPUT_TERMINAL,
diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index cbce54f..41e8abd7 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h
@@ -110,7 +110,7 @@ union ibs_op_data3 { __u64 ld_op:1, /* 0: load op */ st_op:1, /* 1: store op */ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ - dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB miss in 2M page */ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index c3b53be..dbe104d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@ #define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ #define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_SYSFAST32 ( 3*32+15) /* sysenter/syscall in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ @@ -326,6 +326,7 @@ #define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ #define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ +#define X86_FEATURE_MOVRS (12*32+31) /* MOVRS instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ @@ -472,6 +473,7 @@ #define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ +#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 43adc38..6673601 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h
@@ -263,6 +263,11 @@ #define MSR_SNOOP_RSP_0 0x00001328 #define MSR_SNOOP_RSP_1 0x00001329 +#define MSR_OMR_0 0x000003e0 +#define MSR_OMR_1 0x000003e1 +#define MSR_OMR_2 0x000003e2 +#define MSR_OMR_3 0x000003e3 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 @@ -735,7 +740,10 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) -#define MSR_AMD64_SNP_RESV_BIT 19 +#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19) +#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23 +#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT) +#define MSR_AMD64_SNP_RESV_BIT 24 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 #define MSR_AMD64_SAVIC_EN_BIT 0 @@ -1219,6 +1227,7 @@ #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +#define MSR_CORE_PERF_GLOBAL_STATUS_SET 0x00000391 #define MSR_PERF_METRICS 0x00000329
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 7ceff65..0d4538f 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -476,6 +476,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) +#define KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM (1 << 10) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -503,6 +504,7 @@ struct kvm_sync_regs { #define KVM_X86_GRP_SEV 1 # define KVM_X86_SEV_VMSA_FEATURES 0 # define KVM_X86_SNP_POLICY_BITS 1 +# define KVM_X86_SEV_SNP_REQ_CERTS 2 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -743,6 +745,7 @@ enum sev_cmd_id { KVM_SEV_SNP_LAUNCH_START = 100, KVM_SEV_SNP_LAUNCH_UPDATE, KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_SNP_ENABLE_REQ_CERTS, KVM_SEV_NR_MAX, }; @@ -914,8 +917,10 @@ struct kvm_sev_snp_launch_finish { __u64 pad1[4]; }; -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +#define KVM_X2APIC_API_USE_32BIT_IDS _BITULL(0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK _BITULL(1) +#define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST _BITULL(2) +#define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST _BITULL(3) struct kvm_hyperv_eventfd { __u32 conn_id;
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 55d59ed..643f707 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c
@@ -162,8 +162,11 @@ static int load_xbc_file(const char *path, char **buf) if (fd < 0) return -errno; ret = fstat(fd, &stat); - if (ret < 0) - return -errno; + if (ret < 0) { + ret = -errno; + close(fd); + return ret; + } ret = load_xbc_fd(fd, buf, stat.st_size);
diff --git a/tools/bootconfig/samples/bad-non-closed-brace.bconf b/tools/bootconfig/samples/bad-non-closed-brace.bconf new file mode 100644 index 0000000..6ed9f33 --- /dev/null +++ b/tools/bootconfig/samples/bad-non-closed-brace.bconf
@@ -0,0 +1,4 @@ +foo { + bar { + buz + }
diff --git a/tools/bootconfig/samples/bad-over-max-brace.bconf b/tools/bootconfig/samples/bad-over-max-brace.bconf new file mode 100644 index 0000000..74b5dc9 --- /dev/null +++ b/tools/bootconfig/samples/bad-over-max-brace.bconf
@@ -0,0 +1,19 @@ +key1 { +key2 { +key3 { +key4 { +key5 { +key6 { +key7 { +key8 { +key9 { +key10 { +key11 { +key12 { +key13 { +key14 { +key15 { +key16 { +key17 { +}}}}}}}}}}}}}}}}} +
diff --git a/tools/bootconfig/samples/exp-good-nested-brace.bconf b/tools/bootconfig/samples/exp-good-nested-brace.bconf new file mode 100644 index 0000000..19e0f51 --- /dev/null +++ b/tools/bootconfig/samples/exp-good-nested-brace.bconf
@@ -0,0 +1 @@ +key1.key2.key3.key4.key5.key6.key7.key8.key9.key10.key11.key12.key13.key14.key15.key16;
diff --git a/tools/bootconfig/samples/good-nested-brace.bconf b/tools/bootconfig/samples/good-nested-brace.bconf new file mode 100644 index 0000000..980d094 --- /dev/null +++ b/tools/bootconfig/samples/good-nested-brace.bconf
@@ -0,0 +1,18 @@ +key1 { +key2 { +key3 { +key4 { +key5 { +key6 { +key7 { +key8 { +key9 { +key10 { +key11 { +key12 { +key13 { +key14 { +key15 { +key16 { +}}}}}}}}}}}}}}}} +
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index be9bd18..fc69f81 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh
@@ -171,6 +171,15 @@ xfail grep -q 'val[[:space:]]' $OUTFILE xpass grep -q 'val2[[:space:]]' $OUTFILE +echo "Showing correct line:column of no closing brace" +cat > $TEMPCONF << EOF +foo { +bar { +} +EOF +$BOOTCONF -a $TEMPCONF $INITRD 2> $OUTFILE +xpass grep -q "1:1" $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 1733a6e..7672208 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile
@@ -23,6 +23,7 @@ HOSTCC ?= gcc HOSTLD ?= ld HOSTAR ?= ar +HOSTPKG_CONFIG ?= pkg-config CROSS_COMPILE = OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -63,7 +64,14 @@ $(abspath $@) install_headers LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) + +ifneq ($(filter -static,$(EXTRA_LDFLAGS)),) +LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs --static 2>/dev/null || echo -lelf -lzstd) +else LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) +endif + +ZLIB_LIBS := $(shell $(HOSTPKG_CONFIG) zlib --libs 2>/dev/null || echo -lz) HOSTCFLAGS_resolve_btfids += -g \ -I$(srctree)/tools/include \ @@ -73,7 +81,7 @@ $(LIBELF_FLAGS) \ -Wall -Werror -LIBS = $(LIBELF_LIBS) -lz +LIBS = $(LIBELF_LIBS) $(ZLIB_LIBS) export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include @@ -83,7 +91,7 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) - $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) + $(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) $(EXTRA_LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index ca7fcd0..5208f65 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c
@@ -226,7 +226,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name) } static struct btf_id *__btf_id__add(struct rb_root *root, - char *name, + const char *name, enum btf_id_kind kind, bool unique) { @@ -250,7 +250,11 @@ static struct btf_id *__btf_id__add(struct rb_root *root, id = zalloc(sizeof(*id)); if (id) { pr_debug("adding symbol %s\n", name); - id->name = name; + id->name = strdup(name); + if (!id->name) { + free(id); + return NULL; + } id->kind = kind; rb_link_node(&id->rb_node, parent, p); rb_insert_color(&id->rb_node, root); @@ -258,17 +262,21 @@ static struct btf_id *__btf_id__add(struct rb_root *root, return id; } -static inline struct btf_id *btf_id__add(struct rb_root *root, char *name, enum btf_id_kind kind) +static inline struct btf_id *btf_id__add(struct rb_root *root, + const char *name, + enum btf_id_kind kind) { return __btf_id__add(root, name, kind, false); } -static inline struct btf_id *btf_id__add_unique(struct rb_root *root, char *name, enum btf_id_kind kind) +static inline struct btf_id *btf_id__add_unique(struct rb_root *root, + const char *name, + enum btf_id_kind kind) { return __btf_id__add(root, name, kind, true); } -static char *get_id(const char *prefix_end) +static int get_id(const char *prefix_end, char *buf, size_t buf_sz) { /* * __BTF_ID__func__vfs_truncate__0 @@ -277,28 +285,28 @@ static char *get_id(const char *prefix_end) */ int len = strlen(prefix_end); int pos = sizeof("__") - 1; - char *p, *id; + char *p; if (pos >= len) - return NULL; + return -1; - id = strdup(prefix_end + pos); - if (id) { - /* - * __BTF_ID__func__vfs_truncate__0 - * id = ^ - * - * cut the unique id part - */ - p = strrchr(id, '_'); - p--; - if (*p != '_') { - free(id); - return NULL; - } - *p = '\0'; - } - return id; + if (len - pos >= buf_sz) + return -1; + + strcpy(buf, prefix_end + pos); + /* + * __BTF_ID__func__vfs_truncate__0 + * buf = ^ + * + * cut the unique id part + */ + p = strrchr(buf, '_'); + p--; + if (*p != '_') + return -1; + *p = '\0'; + + return 0; } static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind kind) @@ -335,10 +343,9 @@ static struct btf_id *add_set(struct object *obj, char *name, enum btf_id_kind k static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) { - char *id; + char id[KSYM_NAME_LEN]; - id = get_id(name + size); - if (!id) { + if (get_id(name + size, id, sizeof(id))) { pr_err("FAILED to parse symbol name: %s\n", name); return NULL; } @@ -346,6 +353,21 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, BTF_ID_KIND_SYM); } +static void btf_id__free_all(struct rb_root *root) +{ + struct rb_node *next; + struct btf_id *id; + + next = rb_first(root); + while (next) { + id = rb_entry(next, struct btf_id, rb_node); + next = rb_next(&id->rb_node); + rb_erase(&id->rb_node, root); + free(id->name); + free(id); + } +} + static void bswap_32_data(void *data, u32 nr_bytes) { u32 cnt, i; @@ -1547,6 +1569,11 @@ int main(int argc, const char **argv) out: btf__free(obj.base_btf); btf__free(obj.btf); + btf_id__free_all(&obj.structs); + btf_id__free_all(&obj.unions); + btf_id__free_all(&obj.typedefs); + btf_id__free_all(&obj.funcs); + btf_id__free_all(&obj.sets); if (obj.efile.elf) { elf_end(obj.efile.elf); close(obj.efile.fd);
diff --git a/tools/build/Build.include b/tools/build/Build.include index e45b2eb..cd0baa7 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include
@@ -99,6 +99,15 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) ### +# Rust flags to be used on rule definition, includes: +# - global $(RUST_FLAGS) +# - per target Rust flags +# - per object Rust flags +rust_flags_1 = $(RUST_FLAGS) $(RUST_FLAGS_$(basetarget).o) $(RUST_FLAGS_$(obj)) +rust_flags_2 = $(filter-out $(RUST_FLAGS_REMOVE_$(basetarget).o), $(rust_flags_1)) +rust_flags = $(filter-out $(RUST_FLAGS_REMOVE_$(obj)), $(rust_flags_2)) + +### ## HOSTCC C flags host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 60e6587..ad69efd 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build
@@ -70,11 +70,13 @@ # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) + printf "$(objprefix)%s " $(patsubst $(objprefix)%,%,$(filter $(obj-y),$^)) | \ + xargs $(LD) -r -o $@,rm -f $@; $(AR) rcs $@) quiet_cmd_host_ld_multi = HOSTLD $@ cmd_host_ld_multi = $(if $(strip $(obj-y)),\ - $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) + printf "$(objprefix)%s " $(patsubst $(objprefix)%,%,$(filter $(obj-y),$^)) | \ + xargs $(HOSTLD) -r -o $@,rm -f $@; $(HOSTAR) rcs $@) rust_common_cmd = \ $(RUSTC) $(rust_flags) \
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h index 2e8e65d..14b268f 100644 --- a/tools/include/linux/args.h +++ b/tools/include/linux/args.h
@@ -22,7 +22,11 @@ #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) /* Concatenate two parameters, but allow them to be expanded beforehand. */ +#ifndef __CONCAT #define __CONCAT(a, b) a ## b +#endif +#ifndef CONCATENATE #define CONCATENATE(a, b) __CONCAT(a, b) +#endif #endif /* _LINUX_ARGS_H */
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ab2aa97..406923b 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h
@@ -32,7 +32,8 @@ /** * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied * error message. - * @condition: the condition which the compiler should know is false. + * @cond: the condition which the compiler should know is false. + * @msg: build-time error message * * See BUILD_BUG_ON for description. */ @@ -60,6 +61,7 @@ /** * static_assert - check integer constant expression at build time + * @expr: expression to be checked * * static_assert() is a wrapper for the C11 _Static_assert, with a * little macro magic to make the message optional (defaulting to the
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 89b0ac0..2e179ab 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h
@@ -22,30 +22,6 @@ #define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) /* - * Below are the definition of bit offsets for perf option, and works as - * arbitrary values for all ETM versions. - * - * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, - * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and - * directly use below macros as config bits. - */ -#define ETM_OPT_BRANCH_BROADCAST 8 -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_CTXTID2 15 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 - -/* ETMv4 CONFIGR programming bits for the ETM OPTs */ -#define ETM4_CFG_BIT_BB 3 -#define ETM4_CFG_BIT_CYCACC 4 -#define ETM4_CFG_BIT_CTXTID 6 -#define ETM4_CFG_BIT_VMID 7 -#define ETM4_CFG_BIT_TS 11 -#define ETM4_CFG_BIT_RETSTK 12 -#define ETM4_CFG_BIT_VMID_OPT 15 - -/* * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h index 6a10ff5..9e957b5 100644 --- a/tools/include/linux/gfp.h +++ b/tools/include/linux/gfp.h
@@ -5,6 +5,10 @@ #include <linux/types.h> #include <linux/gfp_types.h> +/* Helper macro to avoid gfp flags if they are the default one */ +#define __default_gfp(a,...) a +#define default_gfp(...) __default_gfp(__VA_ARGS__ __VA_OPT__(,) GFP_KERNEL) + static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 3de43b1..6c75df3 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h
@@ -139,6 +139,8 @@ enum { * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. * * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + * mark_obj_codetag_empty() should be called upon freeing for objects allocated + * with this flag to indicate that their NULL tags are expected and normal. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) @@ -309,8 +311,10 @@ enum { * * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower * watermark is applied to allow access to "atomic reserves". - * The current implementation doesn't support NMI and few other strict - * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * The current implementation doesn't support NMI, nor contexts that disable + * preemption under PREEMPT_RT. This includes raw_spin_lock() and plain + * preempt_disable() - see "Memory allocation" in + * Documentation/core-api/real-time/differences.rst for more info. * * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. @@ -321,6 +325,7 @@ enum { * %GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. It is very * likely to fail to allocate memory, even for very small allocations. + * The same restrictions on calling contexts apply as for %GFP_ATOMIC. * * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO.
diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index dcb0c1b..3427d78 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h
@@ -69,6 +69,25 @@ }) /** + * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX + * @factor1: first factor + * @factor2: second factor + * + * Returns: calculate @factor1 * @factor2, both promoted to size_t, + * with any overflow causing the return value to be SIZE_MAX. The + * lvalue must be size_t to avoid implicit type conversion. + */ +static inline size_t __must_check size_mul(size_t factor1, size_t factor2) +{ + size_t bytes; + + if (check_mul_overflow(factor1, factor2, &bytes)) + return SIZE_MAX; + + return bytes; +} + +/** * array_size() - Calculate size of 2-dimensional array. * * @a: dimension one
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 94937a6..6d8e941 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h
@@ -202,4 +202,13 @@ static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) return sheaf->size; } +#define __alloc_objs(KMALLOC, GFP, TYPE, COUNT) \ +({ \ + const size_t __obj_size = size_mul(sizeof(TYPE), COUNT); \ + (TYPE *)KMALLOC(__obj_size, GFP); \ +}) + +#define kzalloc_obj(P, ...) \ + __alloc_objs(kzalloc, default_gfp(__VA_ARGS__), typeof(P), 1) + #endif /* _TOOLS_SLAB_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 942370b..a627acc 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h
@@ -860,8 +860,11 @@ __SYSCALL(__NR_file_setattr, sys_file_setattr) #define __NR_listns 470 __SYSCALL(__NR_listns, sys_listns) +#define __NR_rseq_slice_yield 471 +__SYSCALL(__NR_rseq_slice_yield, sys_rseq_slice_yield) + #undef __NR_syscalls -#define __NR_syscalls 471 +#define __NR_syscalls 472 /* * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index dddb781..80364d4 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h
@@ -14,6 +14,10 @@ #include <linux/ioctl.h> #include <asm/kvm.h> +#ifdef __KERNEL__ +#include <linux/kvm_types.h> +#endif + #define KVM_API_VERSION 12 /* @@ -135,6 +139,12 @@ struct kvm_xen_exit { } u; }; +struct kvm_exit_snp_req_certs { + __u64 gpa; + __u64 npages; + __u64 ret; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -180,6 +190,8 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 +#define KVM_EXIT_SNP_REQ_CERTS 43 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -402,7 +414,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; @@ -482,6 +494,8 @@ struct kvm_run { __u64 gva; __u64 gpa; } arm_sea; + /* KVM_EXIT_SNP_REQ_CERTS */ + struct kvm_exit_snp_req_certs snp_req_certs; /* Fix the size of the union. */ char padding[256]; }; @@ -974,6 +988,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD_FLAGS 244 #define KVM_CAP_ARM_SEA_TO_USER 245 #define KVM_CAP_S390_USER_OPEREXEC 246 +#define KVM_CAP_S390_KEYOP 247 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1219,6 +1234,16 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +#define KVM_S390_KEYOP_ISKE 0x01 +#define KVM_S390_KEYOP_RRBE 0x02 +#define KVM_S390_KEYOP_SSKE 0x03 +struct kvm_s390_keyop { + __u64 guest_addr; + __u8 key; + __u8 operation; + __u8 pad[6]; +}; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -1238,6 +1263,7 @@ struct kvm_vfio_spapr_tce { #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x53, struct kvm_s390_keyop) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) @@ -1579,7 +1605,11 @@ struct kvm_stats_desc { __u16 size; __u32 offset; __u32 bucket_size; +#ifdef __KERNEL__ + char name[KVM_STATS_NAME_SIZE]; +#else char name[]; +#endif }; #define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 76e9d06..fd10aa8 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h
@@ -1396,7 +1396,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ #define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ -#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ #define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 6964175..b71d188 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile
@@ -13,7 +13,7 @@ ifeq ($(ARCH_HAS_KLP),y) HAVE_XXHASH = $(shell printf "$(pound)include <xxhash.h>\nXXH3_state_t *state;int main() {}" | \ - $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n) + $(HOSTCC) $(HOSTCFLAGS) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n) ifeq ($(HAVE_XXHASH),y) BUILD_KLP := y LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \ @@ -142,13 +142,15 @@ $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) clean: $(LIBSUBCMD)-clean - $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) - $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(OUTPUT) \( -name '*.o' -o -name '\.*.cmd' -o -name '\.*.d' \) -type f -print | xargs $(RM) $(Q)$(RM) $(OUTPUT)arch/x86/lib/cpu-feature-names.c $(OUTPUT)fixdep $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.objtool $(Q)$(RM) -r -- $(OUTPUT)feature +mrproper: clean + $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL) + FORCE: -.PHONY: clean FORCE +.PHONY: clean mrproper FORCE
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 73bfea2..c581782 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c
@@ -395,52 +395,36 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (!rex_w) break; - if (modrm_reg == CFI_SP) { - - if (mod_is_reg()) { - /* mov %rsp, reg */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = modrm_rm; - } - break; - - } else { - /* skip RIP relative displacement */ - if (is_RIP()) - break; - - /* skip nontrivial SIB */ - if (have_SIB()) { - modrm_rm = sib_base; - if (sib_index != CFI_SP) - break; - } - - /* mov %rsp, disp(%reg) */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = modrm_rm; - op->dest.offset = ins.displacement.value; - } - break; - } - - break; - } - - if (rm_is_reg(CFI_SP)) { - - /* mov reg, %rsp */ + if (mod_is_reg()) { + /* mov reg, reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + op->dest.reg = modrm_rm; + } + break; + } + + /* skip RIP relative displacement */ + if (is_RIP()) + break; + + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + + /* mov %rsp, disp(%reg) */ + if (modrm_reg == CFI_SP) { + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = modrm_rm; + op->dest.offset = ins.displacement.value; } break; }
diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a30379e..b6765e8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c
@@ -2184,12 +2184,11 @@ static void mark_func_jump_tables(struct objtool_file *file, last = insn; /* - * Store back-pointers for unconditional forward jumps such + * Store back-pointers for forward jumps such * that find_jump_table() can back-track using those and * avoid some potentially confusing code. */ - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && - insn->offset > last->offset && + if (insn->jump_dest && insn->jump_dest->offset > insn->offset && !insn->jump_dest->first_jump_src) { @@ -3000,6 +2999,20 @@ static int update_cfi_state(struct instruction *insn, cfi->stack_size += 8; } + else if (cfi->vals[op->src.reg].base == CFI_CFA) { + /* + * Clang RSP musical chairs: + * + * mov %rsp, %rdx [handled above] + * ... + * mov %rdx, %rbx [handled here] + * ... + * mov %rbx, %rsp [handled above] + */ + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = cfi->vals[op->src.reg].offset; + } + break; @@ -3734,7 +3747,7 @@ static void checksum_update_insn(struct objtool_file *file, struct symbol *func, static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state); static int do_validate_branch(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct insn_state state); + struct instruction *insn, struct insn_state *state); static int validate_insn(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state *statep, @@ -3999,7 +4012,7 @@ static int validate_insn(struct objtool_file *file, struct symbol *func, * tools/objtool/Documentation/objtool.txt. */ static int do_validate_branch(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct insn_state state) + struct instruction *insn, struct insn_state *state) { struct instruction *next_insn, *prev_insn = NULL; bool dead_end; @@ -4030,7 +4043,7 @@ static int do_validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - ret = validate_insn(file, func, insn, &state, prev_insn, next_insn, + ret = validate_insn(file, func, insn, state, prev_insn, next_insn, &dead_end); if (!insn->trace) { @@ -4041,7 +4054,7 @@ static int do_validate_branch(struct objtool_file *file, struct symbol *func, } if (!dead_end && !next_insn) { - if (state.cfi.cfa.base == CFI_UNDEFINED) + if (state->cfi.cfa.base == CFI_UNDEFINED) return 0; if (file->ignore_unreachables) return 0; @@ -4066,7 +4079,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, int ret; trace_depth_inc(); - ret = do_validate_branch(file, func, insn, state); + ret = do_validate_branch(file, func, insn, &state); trace_depth_dec(); return ret;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 2c02c7b..2ffe3eb 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c
@@ -16,7 +16,6 @@ #include <string.h> #include <unistd.h> #include <errno.h> -#include <libgen.h> #include <ctype.h> #include <linux/align.h> #include <linux/kernel.h> @@ -1189,7 +1188,7 @@ struct elf *elf_open_read(const char *name, int flags) struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name) { struct section *null, *symtab, *strtab, *shstrtab; - char *dir, *base, *tmp_name; + char *tmp_name; struct symbol *sym; struct elf *elf; @@ -1203,29 +1202,13 @@ struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name) INIT_LIST_HEAD(&elf->sections); - dir = strdup(name); - if (!dir) { - ERROR_GLIBC("strdup"); - return NULL; - } - - dir = dirname(dir); - - base = strdup(name); - if (!base) { - ERROR_GLIBC("strdup"); - return NULL; - } - - base = basename(base); - - tmp_name = malloc(256); + tmp_name = malloc(strlen(name) + 8); if (!tmp_name) { ERROR_GLIBC("malloc"); return NULL; } - snprintf(tmp_name, 256, "%s/%s.XXXXXX", dir, base); + sprintf(tmp_name, "%s.XXXXXX", name); elf->fd = mkstemp(tmp_name); if (elf->fd == -1) { @@ -1375,7 +1358,7 @@ void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_ memcpy(sec->data->d_buf, data, size); sec->data->d_size = size; - sec->data->d_align = 1; + sec->data->d_align = sec->sh.sh_addralign; offset = ALIGN(sec->sh.sh_size, sec->sh.sh_addralign); sec->sh.sh_size = offset + size;
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index 2b27b54..fa8b7d2 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h
@@ -107,7 +107,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) #define ERROR_ELF(format, ...) __WARN_ELF(ERROR_STR, format, ##__VA_ARGS__) #define ERROR_GLIBC(format, ...) __WARN_GLIBC(ERROR_STR, format, ##__VA_ARGS__) #define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__) -#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) +#define ERROR_INSN(insn, format, ...) ERROR_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) extern bool debug; extern int indent;
diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index 9f1f401..c2c4e49 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c
@@ -14,6 +14,7 @@ #include <objtool/util.h> #include <arch/special.h> +#include <linux/align.h> #include <linux/objtool_types.h> #include <linux/livepatch_external.h> #include <linux/stringify.h> @@ -560,7 +561,7 @@ static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym } if (!is_sec_sym(patched_sym)) - offset = sec_size(out_sec); + offset = ALIGN(sec_size(out_sec), out_sec->sh.sh_addralign); if (patched_sym->len || is_sec_sym(patched_sym)) { void *data = NULL; @@ -1334,25 +1335,25 @@ static bool should_keep_special_sym(struct elf *elf, struct symbol *sym) * be applied after static branch/call init, resulting in code corruption. * * Validate a special section entry to avoid that. Note that an inert - * tracepoint is harmless enough, in that case just skip the entry and print a - * warning. Otherwise, return an error. + * tracepoint or pr_debug() is harmless enough, in that case just skip the + * entry and print a warning. Otherwise, return an error. * - * This is only a temporary limitation which will be fixed when livepatch adds - * support for submodules: fully self-contained modules which are embedded in - * the top-level livepatch module's data and which can be loaded on demand when - * their corresponding to-be-patched module gets loaded. Then klp relocs can - * be retired. + * TODO: This is only a temporary limitation which will be fixed when livepatch + * adds support for submodules: fully self-contained modules which are embedded + * in the top-level livepatch module's data and which can be loaded on demand + * when their corresponding to-be-patched module gets loaded. Then klp relocs + * can be retired. * * Return: * -1: error: validation failed - * 1: warning: tracepoint skipped + * 1: warning: disabled tracepoint or pr_debug() * 0: success */ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym) { bool static_branch = !strcmp(sym->sec->name, "__jump_table"); bool static_call = !strcmp(sym->sec->name, ".static_call_sites"); - struct symbol *code_sym = NULL; + const char *code_sym = NULL; unsigned long code_offset = 0; struct reloc *reloc; int ret = 0; @@ -1364,12 +1365,15 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym const char *sym_modname; struct export *export; + if (convert_reloc_sym(e->patched, reloc)) + continue; + /* Static branch/call keys are always STT_OBJECT */ if (reloc->sym->type != STT_OBJECT) { /* Save code location which can be printed below */ if (reloc->sym->type == STT_FUNC && !code_sym) { - code_sym = reloc->sym; + code_sym = reloc->sym->name; code_offset = reloc_addend(reloc); } @@ -1392,16 +1396,26 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym if (!strcmp(sym_modname, "vmlinux")) continue; + if (!code_sym) + code_sym = "<unknown>"; + if (static_branch) { if (strstarts(reloc->sym->name, "__tracepoint_")) { WARN("%s: disabling unsupported tracepoint %s", - code_sym->name, reloc->sym->name + 13); + code_sym, reloc->sym->name + 13); + ret = 1; + continue; + } + + if (strstr(reloc->sym->name, "__UNIQUE_ID_ddebug_")) { + WARN("%s: disabling unsupported pr_debug()", + code_sym); ret = 1; continue; } ERROR("%s+0x%lx: unsupported static branch key %s. Use static_key_enabled() instead", - code_sym->name, code_offset, reloc->sym->name); + code_sym, code_offset, reloc->sym->name); return -1; } @@ -1412,7 +1426,7 @@ static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym } ERROR("%s()+0x%lx: unsupported static call key %s. Use KLP_STATIC_CALL() instead", - code_sym->name, code_offset, reloc->sym->name); + code_sym, code_offset, reloc->sym->name); return -1; }
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a8dc72c..15fbba9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config
@@ -1163,6 +1163,24 @@ CFLAGS += -DHAVE_RUST_SUPPORT $(call detected,CONFIG_RUST_SUPPORT) endif + + ifneq ($(CROSS_COMPILE),) + RUST_TARGET_FLAGS_arm := arm-unknown-linux-gnueabi + RUST_TARGET_FLAGS_arm64 := aarch64-unknown-linux-gnu + RUST_TARGET_FLAGS_m68k := m68k-unknown-linux-gnu + RUST_TARGET_FLAGS_mips := mipsel-unknown-linux-gnu + RUST_TARGET_FLAGS_powerpc := powerpc64le-unknown-linux-gnu + RUST_TARGET_FLAGS_riscv := riscv64gc-unknown-linux-gnu + RUST_TARGET_FLAGS_s390 := s390x-unknown-linux-gnu + RUST_TARGET_FLAGS_x86 := x86_64-unknown-linux-gnu + RUST_TARGET_FLAGS_x86_64 := x86_64-unknown-linux-gnu + + ifeq ($(RUST_TARGET_FLAGS_$(ARCH)),) + $(error Unknown rust cross compilation architecture $(ARCH)) + endif + + RUST_FLAGS += --target=$(RUST_TARGET_FLAGS_$(ARCH)) + endif endif # Among the variables below, these:
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 11b63ba..f7b936d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf
@@ -274,7 +274,7 @@ PYLINT := $(shell which pylint 2> /dev/null) endif -export srctree OUTPUT RM CC CXX RUSTC LD AR CFLAGS CXXFLAGS V BISON FLEX AWK +export srctree OUTPUT RM CC CXX RUSTC LD AR CFLAGS CXXFLAGS RUST_FLAGS V BISON FLEX AWK export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK MYPY PYLINT include $(srctree)/tools/build/Makefile.include
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index fd09afa..94351e2 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -485,3 +485,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index dc3f4e8..4418d21 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -68,20 +68,6 @@ static const char * const metadata_ete_ro[] = { enum cs_etm_version { CS_NOT_PRESENT, CS_ETMV3, CS_ETMV4, CS_ETE }; -/* ETMv4 CONFIGR register bits */ -#define TRCCONFIGR_BB BIT(3) -#define TRCCONFIGR_CCI BIT(4) -#define TRCCONFIGR_CID BIT(6) -#define TRCCONFIGR_VMID BIT(7) -#define TRCCONFIGR_TS BIT(11) -#define TRCCONFIGR_RS BIT(12) -#define TRCCONFIGR_VMIDOPT BIT(15) - -/* ETMv3 ETMCR register bits */ -#define ETMCR_CYC_ACC BIT(12) -#define ETMCR_TIMESTAMP_EN BIT(28) -#define ETMCR_RETURN_STACK BIT(29) - static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu); static int cs_etm_get_ro(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path, __u64 *val); static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, struct perf_cpu cpu, const char *path);
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 9b92bdd..630aab9 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -385,3 +385,4 @@ 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr 470 n64 listns sys_listns +471 n64 rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index ec4458c..4fcc7c5 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -561,3 +561,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 nospu rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 5863787..09a7ef0 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -3,473 +3,398 @@ # System call table for s390 # # Format: +# <nr> <abi> <syscall> <entry> # -# <nr> <abi> <syscall> <entry-64bit> <compat-entry> -# -# where <abi> can be common, 64, or 32 +# <abi> is always common. -1 common exit sys_exit sys_exit -2 common fork sys_fork sys_fork -3 common read sys_read compat_sys_s390_read -4 common write sys_write compat_sys_s390_write -5 common open sys_open compat_sys_open -6 common close sys_close sys_close -7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat sys_creat -9 common link sys_link sys_link -10 common unlink sys_unlink sys_unlink -11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir sys_chdir -13 32 time - sys_time32 -14 common mknod sys_mknod sys_mknod -15 common chmod sys_chmod sys_chmod -16 32 lchown - sys_lchown16 -19 common lseek sys_lseek compat_sys_lseek -20 common getpid sys_getpid sys_getpid -21 common mount sys_mount sys_mount -22 common umount sys_oldumount sys_oldumount -23 32 setuid - sys_setuid16 -24 32 getuid - sys_getuid16 -25 32 stime - sys_stime32 -26 common ptrace sys_ptrace compat_sys_ptrace -27 common alarm sys_alarm sys_alarm -29 common pause sys_pause sys_pause -30 common utime sys_utime sys_utime32 -33 common access sys_access sys_access -34 common nice sys_nice sys_nice -36 common sync sys_sync sys_sync -37 common kill sys_kill sys_kill -38 common rename sys_rename sys_rename -39 common mkdir sys_mkdir sys_mkdir -40 common rmdir sys_rmdir sys_rmdir -41 common dup sys_dup sys_dup -42 common pipe sys_pipe sys_pipe -43 common times sys_times compat_sys_times -45 common brk sys_brk sys_brk -46 32 setgid - sys_setgid16 -47 32 getgid - sys_getgid16 -48 common signal sys_signal sys_signal -49 32 geteuid - sys_geteuid16 -50 32 getegid - sys_getegid16 -51 common acct sys_acct sys_acct -52 common umount2 sys_umount sys_umount -54 common ioctl sys_ioctl compat_sys_ioctl -55 common fcntl sys_fcntl compat_sys_fcntl -57 common setpgid sys_setpgid sys_setpgid -60 common umask sys_umask sys_umask -61 common chroot sys_chroot sys_chroot -62 common ustat sys_ustat compat_sys_ustat -63 common dup2 sys_dup2 sys_dup2 -64 common getppid sys_getppid sys_getppid -65 common getpgrp sys_getpgrp sys_getpgrp -66 common setsid sys_setsid sys_setsid -67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - sys_setreuid16 -71 32 setregid - sys_setregid16 -72 common sigsuspend sys_sigsuspend sys_sigsuspend -73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname sys_sethostname -75 common setrlimit sys_setrlimit compat_sys_setrlimit -76 32 getrlimit - compat_sys_old_getrlimit -77 common getrusage sys_getrusage compat_sys_getrusage -78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday -79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - sys_getgroups16 -81 32 setgroups - sys_setgroups16 -83 common symlink sys_symlink sys_symlink -85 common readlink sys_readlink sys_readlink -86 common uselib sys_uselib sys_uselib -87 common swapon sys_swapon sys_swapon -88 common reboot sys_reboot sys_reboot -89 common readdir - compat_sys_old_readdir -90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap sys_munmap -92 common truncate sys_truncate compat_sys_truncate -93 common ftruncate sys_ftruncate compat_sys_ftruncate -94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - sys_fchown16 -96 common getpriority sys_getpriority sys_getpriority -97 common setpriority sys_setpriority sys_setpriority -99 common statfs sys_statfs compat_sys_statfs -100 common fstatfs sys_fstatfs compat_sys_fstatfs -101 32 ioperm - - -102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog sys_syslog -104 common setitimer sys_setitimer compat_sys_setitimer -105 common getitimer sys_getitimer compat_sys_getitimer -106 common stat sys_newstat compat_sys_newstat -107 common lstat sys_newlstat compat_sys_newlstat -108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie - - -111 common vhangup sys_vhangup sys_vhangup -112 common idle - - -114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff sys_swapoff -116 common sysinfo sys_sysinfo compat_sys_sysinfo -117 common ipc sys_s390_ipc compat_sys_s390_ipc -118 common fsync sys_fsync sys_fsync -119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone sys_clone -121 common setdomainname sys_setdomainname sys_setdomainname -122 common uname sys_newuname sys_newuname -124 common adjtimex sys_adjtimex sys_adjtimex_time32 -125 common mprotect sys_mprotect sys_mprotect -126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask -127 common create_module - - -128 common init_module sys_init_module sys_init_module -129 common delete_module sys_delete_module sys_delete_module -130 common get_kernel_syms - - -131 common quotactl sys_quotactl sys_quotactl -132 common getpgid sys_getpgid sys_getpgid -133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_ni_syscall sys_ni_syscall -135 common sysfs sys_sysfs sys_sysfs -136 common personality sys_s390_personality sys_s390_personality -137 common afs_syscall - - -138 32 setfsuid - sys_setfsuid16 -139 32 setfsgid - sys_setfsgid16 -140 32 _llseek - sys_llseek -141 common getdents sys_getdents compat_sys_getdents -142 32 _newselect - compat_sys_select -142 64 select sys_select - -143 common flock sys_flock sys_flock -144 common msync sys_msync sys_msync -145 common readv sys_readv sys_readv -146 common writev sys_writev sys_writev -147 common getsid sys_getsid sys_getsid -148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl - - -150 common mlock sys_mlock sys_mlock -151 common munlock sys_munlock sys_munlock -152 common mlockall sys_mlockall sys_mlockall -153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam sys_sched_setparam -155 common sched_getparam sys_sched_getparam sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler -157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler -158 common sched_yield sys_sched_yield sys_sched_yield -159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max -160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min -161 common sched_rr_get_interval sys_sched_rr_get_interval sys_sched_rr_get_interval_time32 -162 common nanosleep sys_nanosleep sys_nanosleep_time32 -163 common mremap sys_mremap sys_mremap -164 32 setresuid - sys_setresuid16 -165 32 getresuid - sys_getresuid16 -167 common query_module - - -168 common poll sys_poll sys_poll -169 common nfsservctl - - -170 32 setresgid - sys_setresgid16 -171 32 getresgid - sys_getresgid16 -172 common prctl sys_prctl sys_prctl -173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn -174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction -175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask -176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending -177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time32 -178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo -179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -180 common pread64 sys_pread64 compat_sys_s390_pread64 -181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - sys_chown16 -183 common getcwd sys_getcwd sys_getcwd -184 common capget sys_capget sys_capget -185 common capset sys_capset sys_capset -186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack -187 common sendfile sys_sendfile64 compat_sys_sendfile -188 common getpmsg - - -189 common putpmsg - - -190 common vfork sys_vfork sys_vfork -191 32 ugetrlimit - compat_sys_getrlimit -191 64 getrlimit sys_getrlimit - -192 32 mmap2 - compat_sys_s390_mmap2 -193 32 truncate64 - compat_sys_s390_truncate64 -194 32 ftruncate64 - compat_sys_s390_ftruncate64 -195 32 stat64 - compat_sys_s390_stat64 -196 32 lstat64 - compat_sys_s390_lstat64 -197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - sys_lchown -198 64 lchown sys_lchown - -199 32 getuid32 - sys_getuid -199 64 getuid sys_getuid - -200 32 getgid32 - sys_getgid -200 64 getgid sys_getgid - -201 32 geteuid32 - sys_geteuid -201 64 geteuid sys_geteuid - -202 32 getegid32 - sys_getegid -202 64 getegid sys_getegid - -203 32 setreuid32 - sys_setreuid -203 64 setreuid sys_setreuid - -204 32 setregid32 - sys_setregid -204 64 setregid sys_setregid - -205 32 getgroups32 - sys_getgroups -205 64 getgroups sys_getgroups - -206 32 setgroups32 - sys_setgroups -206 64 setgroups sys_setgroups - -207 32 fchown32 - sys_fchown -207 64 fchown sys_fchown - -208 32 setresuid32 - sys_setresuid -208 64 setresuid sys_setresuid - -209 32 getresuid32 - sys_getresuid -209 64 getresuid sys_getresuid - -210 32 setresgid32 - sys_setresgid -210 64 setresgid sys_setresgid - -211 32 getresgid32 - sys_getresgid -211 64 getresgid sys_getresgid - -212 32 chown32 - sys_chown -212 64 chown sys_chown - -213 32 setuid32 - sys_setuid -213 64 setuid sys_setuid - -214 32 setgid32 - sys_setgid -214 64 setgid sys_setgid - -215 32 setfsuid32 - sys_setfsuid -215 64 setfsuid sys_setfsuid - -216 32 setfsgid32 - sys_setfsgid -216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root sys_pivot_root -218 common mincore sys_mincore sys_mincore -219 common madvise sys_madvise sys_madvise -220 common getdents64 sys_getdents64 sys_getdents64 -221 32 fcntl64 - compat_sys_fcntl64 -222 common readahead sys_readahead compat_sys_s390_readahead -223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr sys_setxattr -225 common lsetxattr sys_lsetxattr sys_lsetxattr -226 common fsetxattr sys_fsetxattr sys_fsetxattr -227 common getxattr sys_getxattr sys_getxattr -228 common lgetxattr sys_lgetxattr sys_lgetxattr -229 common fgetxattr sys_fgetxattr sys_fgetxattr -230 common listxattr sys_listxattr sys_listxattr -231 common llistxattr sys_llistxattr sys_llistxattr -232 common flistxattr sys_flistxattr sys_flistxattr -233 common removexattr sys_removexattr sys_removexattr -234 common lremovexattr sys_lremovexattr sys_lremovexattr -235 common fremovexattr sys_fremovexattr sys_fremovexattr -236 common gettid sys_gettid sys_gettid -237 common tkill sys_tkill sys_tkill -238 common futex sys_futex sys_futex_time32 -239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity -240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity -241 common tgkill sys_tgkill sys_tgkill -243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy sys_io_destroy -245 common io_getevents sys_io_getevents sys_io_getevents_time32 -246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel sys_io_cancel -248 common exit_group sys_exit_group sys_exit_group -249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl -251 common epoll_wait sys_epoll_wait sys_epoll_wait -252 common set_tid_address sys_set_tid_address sys_set_tid_address -253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 -254 common timer_create sys_timer_create compat_sys_timer_create -255 common timer_settime sys_timer_settime sys_timer_settime32 -256 common timer_gettime sys_timer_gettime sys_timer_gettime32 -257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun -258 common timer_delete sys_timer_delete sys_timer_delete -259 common clock_settime sys_clock_settime sys_clock_settime32 -260 common clock_gettime sys_clock_gettime sys_clock_gettime32 -261 common clock_getres sys_clock_getres sys_clock_getres_time32 -262 common clock_nanosleep sys_clock_nanosleep sys_clock_nanosleep_time32 -264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 -265 common statfs64 sys_statfs64 compat_sys_statfs64 -266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind sys_mbind -269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy -271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink sys_mq_unlink -273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 -274 common mq_timedreceive sys_mq_timedreceive sys_mq_timedreceive_time32 -275 common mq_notify sys_mq_notify compat_sys_mq_notify -276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr -277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key sys_add_key -279 common request_key sys_request_key sys_request_key -280 common keyctl sys_keyctl compat_sys_keyctl -281 common waitid sys_waitid compat_sys_waitid -282 common ioprio_set sys_ioprio_set sys_ioprio_set -283 common ioprio_get sys_ioprio_get sys_ioprio_get -284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch -286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages sys_migrate_pages -288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat sys_mkdirat -290 common mknodat sys_mknodat sys_mknodat -291 common fchownat sys_fchownat sys_fchownat -292 common futimesat sys_futimesat sys_futimesat_time32 -293 32 fstatat64 - compat_sys_s390_fstatat64 -293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat sys_unlinkat -295 common renameat sys_renameat sys_renameat -296 common linkat sys_linkat sys_linkat -297 common symlinkat sys_symlinkat sys_symlinkat -298 common readlinkat sys_readlinkat sys_readlinkat -299 common fchmodat sys_fchmodat sys_fchmodat -300 common faccessat sys_faccessat sys_faccessat -301 common pselect6 sys_pselect6 compat_sys_pselect6_time32 -302 common ppoll sys_ppoll compat_sys_ppoll_time32 -303 common unshare sys_unshare sys_unshare -304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice sys_splice -307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee sys_tee -309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages sys_move_pages -311 common getcpu sys_getcpu sys_getcpu -312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait -313 common utimes sys_utimes sys_utimes_time32 -314 common fallocate sys_fallocate compat_sys_s390_fallocate -315 common utimensat sys_utimensat sys_utimensat_time32 -316 common signalfd sys_signalfd compat_sys_signalfd -317 common timerfd - - -318 common eventfd sys_eventfd sys_eventfd -319 common timerfd_create sys_timerfd_create sys_timerfd_create -320 common timerfd_settime sys_timerfd_settime sys_timerfd_settime32 -321 common timerfd_gettime sys_timerfd_gettime sys_timerfd_gettime32 -322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 -323 common eventfd2 sys_eventfd2 sys_eventfd2 -324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 sys_pipe2 -326 common dup3 sys_dup3 sys_dup3 -327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 -328 common preadv sys_preadv compat_sys_preadv -329 common pwritev sys_pwritev compat_sys_pwritev -330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open sys_perf_event_open -332 common fanotify_init sys_fanotify_init sys_fanotify_init -333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at -336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at -337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32 -338 common syncfs sys_syncfs sys_syncfs -339 common setns sys_setns sys_setns -340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv -341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev -342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp sys_kcmp -344 common finit_module sys_finit_module sys_finit_module -345 common sched_setattr sys_sched_setattr sys_sched_setattr -346 common sched_getattr sys_sched_getattr sys_sched_getattr -347 common renameat2 sys_renameat2 sys_renameat2 -348 common seccomp sys_seccomp sys_seccomp -349 common getrandom sys_getrandom sys_getrandom -350 common memfd_create sys_memfd_create sys_memfd_create -351 common bpf sys_bpf sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read -354 common execveat sys_execveat compat_sys_execveat -355 common userfaultfd sys_userfaultfd sys_userfaultfd -356 common membarrier sys_membarrier sys_membarrier -357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg_time32 -358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg -359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair sys_socketpair -361 common bind sys_bind sys_bind -362 common connect sys_connect sys_connect -363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 sys_accept4 -365 common getsockopt sys_getsockopt sys_getsockopt -366 common setsockopt sys_setsockopt sys_setsockopt -367 common getsockname sys_getsockname sys_getsockname -368 common getpeername sys_getpeername sys_getpeername -369 common sendto sys_sendto sys_sendto -370 common sendmsg sys_sendmsg compat_sys_sendmsg -371 common recvfrom sys_recvfrom compat_sys_recvfrom -372 common recvmsg sys_recvmsg compat_sys_recvmsg -373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 sys_mlock2 -375 common copy_file_range sys_copy_file_range sys_copy_file_range -376 common preadv2 sys_preadv2 compat_sys_preadv2 -377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage -379 common statx sys_statx sys_statx -380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi -381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load -382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -383 common rseq sys_rseq sys_rseq -384 common pkey_mprotect sys_pkey_mprotect sys_pkey_mprotect -385 common pkey_alloc sys_pkey_alloc sys_pkey_alloc -386 common pkey_free sys_pkey_free sys_pkey_free +1 common exit sys_exit +2 common fork sys_fork +3 common read sys_read +4 common write sys_write +5 common open sys_open +6 common close sys_close +7 common restart_syscall sys_restart_syscall +8 common creat sys_creat +9 common link sys_link +10 common unlink sys_unlink +11 common execve sys_execve +12 common chdir sys_chdir +14 common mknod sys_mknod +15 common chmod sys_chmod +19 common lseek sys_lseek +20 common getpid sys_getpid +21 common mount sys_mount +22 common umount sys_oldumount +26 common ptrace sys_ptrace +27 common alarm sys_alarm +29 common pause sys_pause +30 common utime sys_utime +33 common access sys_access +34 common nice sys_nice +36 common sync sys_sync +37 common kill sys_kill +38 common rename sys_rename +39 common mkdir sys_mkdir +40 common rmdir sys_rmdir +41 common dup sys_dup +42 common pipe sys_pipe +43 common times sys_times +45 common brk sys_brk +48 common signal sys_signal +51 common acct sys_acct +52 common umount2 sys_umount +54 common ioctl sys_ioctl +55 common fcntl sys_fcntl +57 common setpgid sys_setpgid +60 common umask sys_umask +61 common chroot sys_chroot +62 common ustat sys_ustat +63 common dup2 sys_dup2 +64 common getppid sys_getppid +65 common getpgrp sys_getpgrp +66 common setsid sys_setsid +67 common sigaction sys_sigaction +72 common sigsuspend sys_sigsuspend +73 common sigpending sys_sigpending +74 common sethostname sys_sethostname +75 common setrlimit sys_setrlimit +77 common getrusage sys_getrusage +78 common gettimeofday sys_gettimeofday +79 common settimeofday sys_settimeofday +83 common symlink sys_symlink +85 common readlink sys_readlink +86 common uselib sys_uselib +87 common swapon sys_swapon +88 common reboot sys_reboot +89 common readdir sys_ni_syscall +90 common mmap sys_old_mmap +91 common munmap sys_munmap +92 common truncate sys_truncate +93 common ftruncate sys_ftruncate +94 common fchmod sys_fchmod +96 common getpriority sys_getpriority +97 common setpriority sys_setpriority +99 common statfs sys_statfs +100 common fstatfs sys_fstatfs +102 common socketcall sys_socketcall +103 common syslog sys_syslog +104 common setitimer sys_setitimer +105 common getitimer sys_getitimer +106 common stat sys_newstat +107 common lstat sys_newlstat +108 common fstat sys_newfstat +110 common lookup_dcookie sys_ni_syscall +111 common vhangup sys_vhangup +112 common idle sys_ni_syscall +114 common wait4 sys_wait4 +115 common swapoff sys_swapoff +116 common sysinfo sys_sysinfo +117 common ipc sys_s390_ipc +118 common fsync sys_fsync +119 common sigreturn sys_sigreturn +120 common clone sys_clone +121 common setdomainname sys_setdomainname +122 common uname sys_newuname +124 common adjtimex sys_adjtimex +125 common mprotect sys_mprotect +126 common sigprocmask sys_sigprocmask +127 common create_module sys_ni_syscall +128 common init_module sys_init_module +129 common delete_module sys_delete_module +130 common get_kernel_syms sys_ni_syscall +131 common quotactl sys_quotactl +132 common getpgid sys_getpgid +133 common fchdir sys_fchdir +134 common bdflush sys_ni_syscall +135 common sysfs sys_sysfs +136 common personality sys_s390_personality +137 common afs_syscall sys_ni_syscall +141 common getdents sys_getdents +142 common select sys_select +143 common flock sys_flock +144 common msync sys_msync +145 common readv sys_readv +146 common writev sys_writev +147 common getsid sys_getsid +148 common fdatasync sys_fdatasync +149 common _sysctl sys_ni_syscall +150 common mlock sys_mlock +151 common munlock sys_munlock +152 common mlockall sys_mlockall +153 common munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep +163 common mremap sys_mremap +167 common query_module sys_ni_syscall +168 common poll sys_poll +169 common nfsservctl sys_ni_syscall +172 common prctl sys_prctl +173 common rt_sigreturn sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend +180 common pread64 sys_pread64 +181 common pwrite64 sys_pwrite64 +183 common getcwd sys_getcwd +184 common capget sys_capget +185 common capset sys_capset +186 common sigaltstack sys_sigaltstack +187 common sendfile sys_sendfile64 +188 common getpmsg sys_ni_syscall +189 common putpmsg sys_ni_syscall +190 common vfork sys_vfork +191 common getrlimit sys_getrlimit +198 common lchown sys_lchown +199 common getuid sys_getuid +200 common getgid sys_getgid +201 common geteuid sys_geteuid +202 common getegid sys_getegid +203 common setreuid sys_setreuid +204 common setregid sys_setregid +205 common getgroups sys_getgroups +206 common setgroups sys_setgroups +207 common fchown sys_fchown +208 common setresuid sys_setresuid +209 common getresuid sys_getresuid +210 common setresgid sys_setresgid +211 common getresgid sys_getresgid +212 common chown sys_chown +213 common setuid sys_setuid +214 common setgid sys_setgid +215 common setfsuid sys_setfsuid +216 common setfsgid sys_setfsgid +217 common pivot_root sys_pivot_root +218 common mincore sys_mincore +219 common madvise sys_madvise +220 common getdents64 sys_getdents64 +222 common readahead sys_readahead +224 common setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr +231 common llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr +233 common removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr +236 common gettid sys_gettid +237 common tkill sys_tkill +238 common futex sys_futex +239 common sched_setaffinity sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity +241 common tgkill sys_tgkill +243 common io_setup sys_io_setup +244 common io_destroy sys_io_destroy +245 common io_getevents sys_io_getevents +246 common io_submit sys_io_submit +247 common io_cancel sys_io_cancel +248 common exit_group sys_exit_group +249 common epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 +254 common timer_create sys_timer_create +255 common timer_settime sys_timer_settime +256 common timer_gettime sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime +260 common clock_gettime sys_clock_gettime +261 common clock_getres sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep +265 common statfs64 sys_statfs64 +266 common fstatfs64 sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages +268 common mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy +271 common mq_open sys_mq_open +272 common mq_unlink sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive +275 common mq_notify sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr +277 common kexec_load sys_kexec_load +278 common add_key sys_add_key +279 common request_key sys_request_key +280 common keyctl sys_keyctl +281 common waitid sys_waitid +282 common ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages +288 common openat sys_openat +289 common mkdirat sys_mkdirat +290 common mknodat sys_mknodat +291 common fchownat sys_fchownat +292 common futimesat sys_futimesat +293 common newfstatat sys_newfstatat +294 common unlinkat sys_unlinkat +295 common renameat sys_renameat +296 common linkat sys_linkat +297 common symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat +300 common faccessat sys_faccessat +301 common pselect6 sys_pselect6 +302 common ppoll sys_ppoll +303 common unshare sys_unshare +304 common set_robust_list sys_set_robust_list +305 common get_robust_list sys_get_robust_list +306 common splice sys_splice +307 common sync_file_range sys_sync_file_range +308 common tee sys_tee +309 common vmsplice sys_vmsplice +310 common move_pages sys_move_pages +311 common getcpu sys_getcpu +312 common epoll_pwait sys_epoll_pwait +313 common utimes sys_utimes +314 common fallocate sys_fallocate +315 common utimensat sys_utimensat +316 common signalfd sys_signalfd +317 common timerfd sys_ni_syscall +318 common eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 +323 common eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 +326 common dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv +329 common pwritev sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open +332 common fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark +334 common prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime +338 common syncfs sys_syncfs +339 common setns sys_setns +340 common process_vm_readv sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp +344 common finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 +348 common seccomp sys_seccomp +349 common getrandom sys_getrandom +350 common memfd_create sys_memfd_create +351 common bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read +354 common execveat sys_execveat +355 common userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg +358 common sendmmsg sys_sendmmsg +359 common socket sys_socket +360 common socketpair sys_socketpair +361 common bind sys_bind +362 common connect sys_connect +363 common listen sys_listen +364 common accept4 sys_accept4 +365 common getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt +367 common getsockname sys_getsockname +368 common getpeername sys_getpeername +369 common sendto sys_sendto +370 common sendmsg sys_sendmsg +371 common recvfrom sys_recvfrom +372 common recvmsg sys_recvmsg +373 common shutdown sys_shutdown +374 common mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range +376 common preadv2 sys_preadv2 +377 common pwritev2 sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx +380 common s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents +383 common rseq sys_rseq +384 common pkey_mprotect sys_pkey_mprotect +385 common pkey_alloc sys_pkey_alloc +386 common pkey_free sys_pkey_free # room for arch specific syscalls -392 64 semtimedop sys_semtimedop - -393 common semget sys_semget sys_semget -394 common semctl sys_semctl compat_sys_semctl -395 common shmget sys_shmget sys_shmget -396 common shmctl sys_shmctl compat_sys_shmctl -397 common shmat sys_shmat compat_sys_shmat -398 common shmdt sys_shmdt sys_shmdt -399 common msgget sys_msgget sys_msgget -400 common msgsnd sys_msgsnd compat_sys_msgsnd -401 common msgrcv sys_msgrcv compat_sys_msgrcv -402 common msgctl sys_msgctl compat_sys_msgctl -403 32 clock_gettime64 - sys_clock_gettime -404 32 clock_settime64 - sys_clock_settime -405 32 clock_adjtime64 - sys_clock_adjtime -406 32 clock_getres_time64 - sys_clock_getres -407 32 clock_nanosleep_time64 - sys_clock_nanosleep -408 32 timer_gettime64 - sys_timer_gettime -409 32 timer_settime64 - sys_timer_settime -410 32 timerfd_gettime64 - sys_timerfd_gettime -411 32 timerfd_settime64 - sys_timerfd_settime -412 32 utimensat_time64 - sys_utimensat -413 32 pselect6_time64 - compat_sys_pselect6_time64 -414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 -417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 -418 32 mq_timedsend_time64 - sys_mq_timedsend -419 32 mq_timedreceive_time64 - sys_mq_timedreceive -420 32 semtimedop_time64 - sys_semtimedop -421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 -422 32 futex_time64 - sys_futex -423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval -424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal -425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup -426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter -427 common io_uring_register sys_io_uring_register sys_io_uring_register -428 common open_tree sys_open_tree sys_open_tree -429 common move_mount sys_move_mount sys_move_mount -430 common fsopen sys_fsopen sys_fsopen -431 common fsconfig sys_fsconfig sys_fsconfig -432 common fsmount sys_fsmount sys_fsmount -433 common fspick sys_fspick sys_fspick -434 common pidfd_open sys_pidfd_open sys_pidfd_open -435 common clone3 sys_clone3 sys_clone3 -436 common close_range sys_close_range sys_close_range -437 common openat2 sys_openat2 sys_openat2 -438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd -439 common faccessat2 sys_faccessat2 sys_faccessat2 -440 common process_madvise sys_process_madvise sys_process_madvise -441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 -442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd -444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset -445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule -446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -447 common memfd_secret sys_memfd_secret sys_memfd_secret -448 common process_mrelease sys_process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node -451 common cachestat sys_cachestat sys_cachestat -452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 -453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack -454 common futex_wake sys_futex_wake sys_futex_wake -455 common futex_wait sys_futex_wait sys_futex_wait -456 common futex_requeue sys_futex_requeue sys_futex_requeue -457 common statmount sys_statmount sys_statmount -458 common listmount sys_listmount sys_listmount -459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr -460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr -461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules -462 common mseal sys_mseal sys_mseal -463 common setxattrat sys_setxattrat sys_setxattrat -464 common getxattrat sys_getxattrat sys_getxattrat -465 common listxattrat sys_listxattrat sys_listxattrat -466 common removexattrat sys_removexattrat sys_removexattrat -467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr -468 common file_getattr sys_file_getattr sys_file_getattr -469 common file_setattr sys_file_setattr sys_file_setattr -470 common listns sys_listns sys_listns +392 common semtimedop sys_semtimedop +393 common semget sys_semget +394 common semctl sys_semctl +395 common shmget sys_shmget +396 common shmctl sys_shmctl +397 common shmat sys_shmat +398 common shmdt sys_shmdt +399 common msgget sys_msgget +400 common msgsnd sys_msgsnd +401 common msgrcv sys_msgrcv +402 common msgctl sys_msgctl +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr +470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 969c113..70b315c 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -474,3 +474,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 39aa26b..7e71bf7 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -480,7 +480,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 common clone3 __sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd @@ -516,3 +516,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index e979a3e..f832ebd 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -476,3 +476,4 @@ 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr 470 i386 listns sys_listns +471 i386 rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 8a4ac48..524155d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -395,6 +395,7 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield # # Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 438a3b1..a9bca4e 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -441,3 +441,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 6b6eec6..4cc3345 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c
@@ -18,6 +18,7 @@ #include <poll.h> #include <ctype.h> #include <linux/capability.h> +#include <linux/err.h> #include <linux/string.h> #include <sys/stat.h> @@ -1209,8 +1210,12 @@ static int prepare_func_profile(struct perf_ftrace *ftrace) ftrace->graph_verbose = 0; ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL); - if (ftrace->profile_hash == NULL) - return -ENOMEM; + if (IS_ERR(ftrace->profile_hash)) { + int err = PTR_ERR(ftrace->profile_hash); + + ftrace->profile_hash = NULL; + return err; + } return 0; }
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index da3aca8..3182662 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh
@@ -187,7 +187,6 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' -check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 63c6578..dc5f948 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build
@@ -214,7 +214,8 @@ quiet_cmd_rm = RM $^ prune_orphans: $(ORPHAN_FILES) - $(Q)$(call echo-cmd,rm)rm -f $^ + # The list of files can be long. Use xargs to prevent issues. + $(Q)$(call echo-cmd,rm)echo "$^" | xargs rm -f JEVENTS_DEPS += prune_orphans endif
diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h index 6e1d5b9..85253fc 100644 --- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h +++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
@@ -77,6 +77,7 @@ */ #define IRQ_WORK_VECTOR 0xf6 +/* IRQ vector for PMIs when running a guest with a mediated PMU. */ #define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5 #define DEFERRED_ERROR_VECTOR 0xf4
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 66ca526..70b2b66 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -253,6 +253,7 @@ struct file_attr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index 5d3f8c9..d9d8659 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -61,7 +61,8 @@ /* * open_tree() flags. */ -#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */ +#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ /* @@ -197,7 +198,10 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 mnt_ns_fd; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; @@ -232,4 +236,9 @@ struct mnt_id_req { #define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + #endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 51c4e8c..55b0446 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -386,4 +386,41 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* RSEQ time slice extensions */ +#define PR_RSEQ_SLICE_EXTENSION 79 +# define PR_RSEQ_SLICE_EXTENSION_GET 1 +# define PR_RSEQ_SLICE_EXTENSION_SET 2 +/* + * Bits for RSEQ_SLICE_EXTENSION_GET/SET + * PR_RSEQ_SLICE_EXT_ENABLE: Enable + */ +# define PR_RSEQ_SLICE_EXT_ENABLE 0x01 + +/* + * Get the current indirect branch tracking configuration for the current + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + */ +#define PR_GET_INDIR_BR_LP_STATUS 80 + +/* + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will + * enable cpu feature for user thread, to track all indirect branches and ensure + * they land on arch defined landing pad instruction. + * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. + * arch64 - If enabled, an indirect branch must land on a BTI instruction. + * riscv - If enabled, an indirect branch must land on an lpad instruction. + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect + * branches will no more be tracked by cpu to land on arch defined landing pad + * instruction. + */ +#define PR_SET_INDIR_BR_LP_STATUS 81 +# define PR_INDIR_BR_LP_ENABLE (1UL << 0) + +/* + * Prevent further changes to the specified indirect branch tracking + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_INDIR_BR_LP_STATUS 82 + #endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/util/annotate-arch/annotate-loongarch.c b/tools/perf/util/annotate-arch/annotate-loongarch.c index 3aeab45..950f34e 100644 --- a/tools/perf/util/annotate-arch/annotate-loongarch.c +++ b/tools/perf/util/annotate-arch/annotate-loongarch.c
@@ -93,7 +93,7 @@ static int loongarch_jump__parse(const struct arch *arch, struct ins_operands *o start = map__unmap_ip(map, sym->start); end = map__unmap_ip(map, sym->end); - ops->target.outside = target.addr < start || target.addr > end; + ops->target.outside = target.addr < start || target.addr >= end; if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2e35229..63f0ee9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c
@@ -44,6 +44,7 @@ #include "strbuf.h" #include <regex.h> #include <linux/bitops.h> +#include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/zalloc.h> @@ -137,8 +138,10 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return -1; src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL); - if (src->samples == NULL) + if (IS_ERR(src->samples)) { zfree(&src->histograms); + src->samples = NULL; + } return src->histograms ? 0 : -1; }
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3050fe2..212f17a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -549,7 +549,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, /* * Process the PE_CONTEXT packets if we have a valid contextID or VMID. * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 - * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + * as VMID, Format attribute 'contextid2' is set in this case. */ switch (cs_etm__get_pid_fmt(etmq)) { case CS_ETM_PIDFMT_CTXTID:
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 95f439c..8a639d2 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c
@@ -194,7 +194,7 @@ int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. * CS_ETM_PIDFMT_NONE: No context IDs * - * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * It's possible that the two format attributes 'contextid1' and 'contextid2' * are enabled at the same time when the session runs on an EL2 kernel. * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be * recorded in the trace data, the tool will selectively use @@ -210,15 +210,15 @@ static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { val = metadata[CS_ETM_ETMCR]; /* CONTEXTIDR is traced */ - if (val & BIT(ETM_OPT_CTXTID)) + if (val & ETMCR_CTXTID) return CS_ETM_PIDFMT_CTXTID; } else { val = metadata[CS_ETMV4_TRCCONFIGR]; /* CONTEXTIDR_EL2 is traced */ - if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + if (val & (TRCCONFIGR_VMID | TRCCONFIGR_VMIDOPT)) return CS_ETM_PIDFMT_CTXTID2; /* CONTEXTIDR_EL1 is traced */ - else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + else if (val & TRCCONFIGR_CID) return CS_ETM_PIDFMT_CTXTID; } @@ -2914,29 +2914,21 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, return 0; } -static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) +static void cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) { - struct evsel *evsel; - struct evlist *evlist = etm->session->evlist; + /* Take first ETM as all options will be the same for all ETMs */ + u64 *metadata = etm->metadata[0]; /* Override timeless mode with user input from --itrace=Z */ if (etm->synth_opts.timeless_decoding) { etm->timeless_decoding = true; - return 0; + return; } - /* - * Find the cs_etm evsel and look at what its timestamp setting was - */ - evlist__for_each_entry(evlist, evsel) - if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { - etm->timeless_decoding = - !(evsel->core.attr.config & BIT(ETM_OPT_TS)); - return 0; - } - - pr_err("CS ETM: Couldn't find ETM evsel\n"); - return -EINVAL; + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) + etm->timeless_decoding = !(metadata[CS_ETM_ETMCR] & ETMCR_TIMESTAMP_EN); + else + etm->timeless_decoding = !(metadata[CS_ETMV4_TRCCONFIGR] & TRCCONFIGR_TS); } /* @@ -3499,9 +3491,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; - err = cs_etm__setup_timeless_decoding(etm); - if (err) - return err; + cs_etm__setup_timeless_decoding(etm); etm->tc.time_shift = tc->time_shift; etm->tc.time_mult = tc->time_mult;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index a8caeea..aa9bb4a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h
@@ -230,6 +230,21 @@ struct cs_etm_packet_queue { /* CoreSight trace ID is currently the bottom 7 bits of the value */ #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) +/* ETMv4 CONFIGR register bits */ +#define TRCCONFIGR_BB BIT(3) +#define TRCCONFIGR_CCI BIT(4) +#define TRCCONFIGR_CID BIT(6) +#define TRCCONFIGR_VMID BIT(7) +#define TRCCONFIGR_TS BIT(11) +#define TRCCONFIGR_RS BIT(12) +#define TRCCONFIGR_VMIDOPT BIT(15) + +/* ETMv3 ETMCR register bits */ +#define ETMCR_CYC_ACC BIT(12) +#define ETMCR_CTXTID BIT(14) +#define ETMCR_TIMESTAMP_EN BIT(28) +#define ETMCR_RETURN_STACK BIT(29) + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index ddcc488f..9e0420e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c
@@ -384,7 +384,7 @@ static int jump__parse(const struct arch *arch, struct ins_operands *ops, struct start = map__unmap_ip(map, sym->start); end = map__unmap_ip(map, sym->end); - ops->target.outside = target.addr < start || target.addr > end; + ops->target.outside = target.addr < start || target.addr >= end; /* * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
diff --git a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c index 43275d2..0f626db 100644 --- a/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c +++ b/tools/perf/util/kvm-stat-arch/kvm-stat-x86.c
@@ -4,9 +4,9 @@ #include "../kvm-stat.h" #include "../evsel.h" #include "../env.h" -#include "../../arch/x86/include/uapi/asm/svm.h" -#include "../../arch/x86/include/uapi/asm/vmx.h" -#include "../../arch/x86/include/uapi/asm/kvm.h" +#include "../../../arch/x86/include/uapi/asm/svm.h" +#include "../../../arch/x86/include/uapi/asm/vmx.h" +#include "../../../arch/x86/include/uapi/asm/kvm.h" #include <subcmd/parse-options.h> define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 46bf4df..7e39d46 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c
@@ -1605,9 +1605,9 @@ bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_gr .metric_or_groups = metric_or_groups, }; - return pmu_metrics_table__for_each_metric(table, - metricgroup__has_metric_or_groups_callback, - &data) + return metricgroup__for_each_metric(table, + metricgroup__has_metric_or_groups_callback, + &data) ? true : false; }
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b9efb29..7b46296 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c
@@ -1117,7 +1117,7 @@ static int config_attr(struct perf_event_attr *attr, static struct evsel_config_term *add_config_term(enum evsel_term_type type, struct list_head *head_terms, - bool weak) + bool weak, char *str, u64 val) { struct evsel_config_term *t; @@ -1128,8 +1128,62 @@ static struct evsel_config_term *add_config_term(enum evsel_term_type type, INIT_LIST_HEAD(&t->list); t->type = type; t->weak = weak; - list_add_tail(&t->list, head_terms); + switch (type) { + case EVSEL__CONFIG_TERM_PERIOD: + case EVSEL__CONFIG_TERM_FREQ: + case EVSEL__CONFIG_TERM_STACK_USER: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG1: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG2: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG3: + case EVSEL__CONFIG_TERM_USR_CHG_CONFIG4: + t->val.val = val; + break; + case EVSEL__CONFIG_TERM_TIME: + t->val.time = val; + break; + case EVSEL__CONFIG_TERM_INHERIT: + t->val.inherit = val; + break; + case EVSEL__CONFIG_TERM_OVERWRITE: + t->val.overwrite = val; + break; + case EVSEL__CONFIG_TERM_MAX_STACK: + t->val.max_stack = val; + break; + case EVSEL__CONFIG_TERM_MAX_EVENTS: + t->val.max_events = val; + break; + case EVSEL__CONFIG_TERM_PERCORE: + t->val.percore = val; + break; + case EVSEL__CONFIG_TERM_AUX_OUTPUT: + t->val.aux_output = val; + break; + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + t->val.aux_sample_size = val; + break; + case EVSEL__CONFIG_TERM_CALLGRAPH: + case EVSEL__CONFIG_TERM_BRANCH: + case EVSEL__CONFIG_TERM_DRV_CFG: + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: + case EVSEL__CONFIG_TERM_AUX_ACTION: + if (str) { + t->val.str = strdup(str); + if (!t->val.str) { + zfree(&t); + return NULL; + } + t->free_str = true; + } + break; + default: + t->val.val = val; + break; + } + + list_add_tail(&t->list, head_terms); return t; } @@ -1142,7 +1196,7 @@ static int get_config_terms(const struct parse_events_terms *head_config, struct evsel_config_term *new_term; enum evsel_term_type new_type; bool str_type = false; - u64 val; + u64 val = 0; switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: @@ -1234,20 +1288,15 @@ static int get_config_terms(const struct parse_events_terms *head_config, continue; } - new_term = add_config_term(new_type, head_terms, term->weak); + /* + * Note: Members evsel_config_term::val and + * parse_events_term::val are unions and endianness needs + * to be taken into account when changing such union members. + */ + new_term = add_config_term(new_type, head_terms, term->weak, + str_type ? term->val.str : NULL, val); if (!new_term) return -ENOMEM; - - if (str_type) { - new_term->val.str = strdup(term->val.str); - if (!new_term->val.str) { - zfree(&new_term); - return -ENOMEM; - } - new_term->free_str = true; - } else { - new_term->val.val = val; - } } return 0; } @@ -1277,10 +1326,9 @@ static int add_cfg_chg(const struct perf_pmu *pmu, if (bits) { struct evsel_config_term *new_term; - new_term = add_config_term(new_term_type, head_terms, false); + new_term = add_config_term(new_term_type, head_terms, false, NULL, bits); if (!new_term) return -ENOMEM; - new_term->val.cfg_chg = bits; } return 0;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index ef79433..ddf1cbd 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c
@@ -703,6 +703,11 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) memcpy(event->mmap2.filename, dso__long_name(dso), dso__long_name_len(dso) + 1); + /* Clear stale build ID from previous module iteration */ + event->mmap2.header.misc &= ~PERF_RECORD_MISC_MMAP_BUILD_ID; + memset(event->mmap2.build_id, 0, sizeof(event->mmap2.build_id)); + event->mmap2.build_id_size = 0; + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); } else { size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64));
diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf index 02eabe8e..abbb469 100644 --- a/tools/power/cpupower/cpupower-service.conf +++ b/tools/power/cpupower/cpupower-service.conf
@@ -30,3 +30,8 @@ # its policy for the relative importance of performance versus energy savings to # the processor. See man CPUPOWER-SET(1) for additional details #PERF_BIAS= + +# Set the Energy Performance Preference +# Available options can be read from +# /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences +#EPP=
diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh index a37dd4c..6283e8b 100644 --- a/tools/power/cpupower/cpupower.sh +++ b/tools/power/cpupower/cpupower.sh
@@ -23,4 +23,10 @@ cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1 fi +# apply Energy Performance Preference +if test -n "$EPP" +then + cpupower set -e "$EPP" > /dev/null || ESTATUS=1 +fi + exit $ESTATUS
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index c2117e5..550a942 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -124,7 +124,11 @@ int cmd_set(int argc, char **argv) } if (params.turbo_boost) { - ret = cpupower_set_turbo_boost(turbo_boost); + if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) + ret = cpupower_set_intel_turbo_boost(turbo_boost); + else + ret = cpupower_set_generic_turbo_boost(turbo_boost); + if (ret) fprintf(stderr, "Error setting turbo-boost\n"); }
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 82ea62b..a3ad80b 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -104,7 +104,7 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ int cpufreq_has_generic_boost_support(bool *active); -int cpupower_set_turbo_boost(int turbo_boost); +int cpupower_set_generic_turbo_boost(int turbo_boost); /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -143,6 +143,7 @@ extern int decode_pstates(unsigned int cpu, int boost_states, int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, int *states); +int cpupower_set_intel_turbo_boost(int turbo_boost); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -189,6 +190,8 @@ static inline int cpupower_set_amd_pstate_mode(char *mode) static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, int *states) { return -1; } +static inline int cpupower_set_intel_turbo_boost(int turbo_boost) +{ return -1; } static inline bool cpupower_amd_pstate_enabled(void) { return false; }
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 166dc1e..eebfc79 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -19,6 +19,9 @@ int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, { int ret; unsigned long long val; + char linebuf[MAX_LINE_LEN]; + char path[SYSFS_PATH_MAX]; + char *endp; *support = *active = *states = 0; @@ -42,8 +45,42 @@ int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, } } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { amd_pstate_boost_init(cpu, support, active); - } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) { *support = *active = 1; + + snprintf(path, sizeof(path), PATH_TO_CPU "intel_pstate/no_turbo"); + + if (!is_valid_path(path)) + return 0; + + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + *active = !val; + } + return 0; +} + +int cpupower_set_intel_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "intel_pstate/no_turbo"); + + /* Fallback to generic solution when intel_pstate driver not running */ + if (!is_valid_path(path)) + return cpupower_set_generic_turbo_boost(turbo_boost); + + snprintf(linebuf, sizeof(linebuf), "%d", !turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + return 0; } @@ -274,7 +311,7 @@ void print_speed(unsigned long speed, int no_rounding) } } -int cpupower_set_turbo_boost(int turbo_boost) +int cpupower_set_generic_turbo_boost(int turbo_boost) { char path[SYSFS_PATH_MAX]; char linebuf[2] = {};
diff --git a/tools/power/cpupower/utils/powercap-info.c b/tools/power/cpupower/utils/powercap-info.c index 3ea4486..e5303348 100644 --- a/tools/power/cpupower/utils/powercap-info.c +++ b/tools/power/cpupower/utils/powercap-info.c
@@ -38,11 +38,11 @@ static int powercap_print_one_zone(struct powercap_zone *zone) printf(" (%s)\n", mode ? "enabled" : "disabled"); if (zone->has_power_uw) - printf(_("%sPower can be monitored in micro Jules\n"), + printf(_("%sPower can be monitored in micro Watts\n"), pr_prefix); if (zone->has_energy_uj) - printf(_("%sPower can be monitored in micro Watts\n"), + printf(_("%sPower can be monitored in micro Jules\n"), pr_prefix); printf("\n");
diff --git a/tools/sched_ext/Kconfig b/tools/sched_ext/Kconfig new file mode 100644 index 0000000..275bd97 --- /dev/null +++ b/tools/sched_ext/Kconfig
@@ -0,0 +1,61 @@ +# sched-ext mandatory options +# +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_SCHED_CLASS_EXT=y + +# Required by some rust schedulers (e.g. scx_p2dq) +# +CONFIG_KALLSYMS_ALL=y + +# Required on arm64 +# +# CONFIG_DEBUG_INFO_REDUCED is not set + +# LAVD tracks futex to give an additional time slice for futex holder +# (i.e., avoiding lock holder preemption) for better system-wide progress. +# LAVD first tries to use ftrace to trace futex function calls. +# If that is not available, it tries to use a tracepoint. +CONFIG_FUNCTION_TRACER=y + +# Enable scheduling debugging +# +CONFIG_SCHED_DEBUG=y + +# Enable extra scheduling features (for a better code coverage while testing +# the schedulers) +# +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_CORE=y +CONFIG_SCHED_MC=y + +# Enable fully preemptible kernel for a better test coverage of the schedulers +# +# CONFIG_PREEMPT_NONE is not set +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT=y +CONFIG_PREEMPT_DYNAMIC=y + +# Additional debugging information (useful to catch potential locking issues) +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_PROVE_LOCKING=y + +# Bpftrace headers (for additional debug info) +CONFIG_BPF_EVENTS=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +CONFIG_DEBUG_FS=y + +# Enable access to kernel configuration and headers at runtime +CONFIG_IKHEADERS=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IKCONFIG=y
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile index 47ad744..21554f0 100644 --- a/tools/sched_ext/Makefile +++ b/tools/sched_ext/Makefile
@@ -122,6 +122,8 @@ -I../../include \ $(call get_sys_includes,$(CLANG)) \ -Wall -Wno-compare-distinct-pointer-types \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -O2 -mcpu=v3 # sort removes libbpf duplicates when not cross-building
diff --git a/tools/sched_ext/README.md b/tools/sched_ext/README.md index 56a9d15..6e282bc 100644 --- a/tools/sched_ext/README.md +++ b/tools/sched_ext/README.md
@@ -58,14 +58,8 @@ CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_DEBUG_INFO_BTF=y -``` - -It's also recommended that you also include the following Kconfig options: - -``` CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y -CONFIG_PAHOLE_HAS_BTF_TAG=y ``` There is a `Kconfig` file in this directory whose contents you can append to
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 8b4897f..edccc99 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h
@@ -125,6 +125,7 @@ static inline long scx_hotplug_seq(void) { int fd; char buf[32]; + char *endptr; ssize_t len; long val; @@ -137,8 +138,10 @@ static inline long scx_hotplug_seq(void) buf[len] = 0; close(fd); - val = strtoul(buf, NULL, 10); - SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val); + errno = 0; + val = strtoul(buf, &endptr, 10); + SCX_BUG_ON(errno == ERANGE || endptr == buf || + (*endptr != '\n' && *endptr != '\0'), "invalid num hotplug events: %ld", val); return val; }
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 2a805f1..710fa03 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c
@@ -66,7 +66,7 @@ int main(int argc, char **argv) assert(skel->rodata->nr_cpu_ids > 0); assert(skel->rodata->nr_cpu_ids <= INT32_MAX); - while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { + while ((opt = getopt(argc, argv, "s:c:vh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
diff --git a/tools/sched_ext/scx_sdt.c b/tools/sched_ext/scx_sdt.c index d8ca9aa..a36405d 100644 --- a/tools/sched_ext/scx_sdt.c +++ b/tools/sched_ext/scx_sdt.c
@@ -54,7 +54,7 @@ int main(int argc, char **argv) optind = 1; skel = SCX_OPS_OPEN(sdt_ops, scx_sdt); - while ((opt = getopt(argc, argv, "fvh")) != -1) { + while ((opt = getopt(argc, argv, "vh")) != -1) { switch (opt) { case 'v': verbose = true;
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index e74868b..7a42b32b 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl
@@ -411,3 +411,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common listns sys_listns +471 common rseq_slice_yield sys_rseq_slice_yield
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 260d8d9..2998e1b 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py
@@ -346,8 +346,10 @@ return self.validate_config(build_dir) def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]: - if not args: - args = [] + # Copy to avoid mutating the caller-supplied list. exec_tests() reuses + # the same args across repeated run_kernel() calls (e.g. --run_isolated), + # so appending to the original would accumulate stale flags on each call. + args = list(args) if args else [] if filter_glob: args.append('kunit.filter_glob=' + filter_glob) if filter:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b674081..f638388 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py
@@ -503,6 +503,32 @@ with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile: self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output') + def test_run_kernel_args_not_mutated(self): + """Verify run_kernel() copies args so callers can reuse them.""" + start_calls = [] + + def fake_start(start_args, unused_build_dir): + start_calls.append(list(start_args)) + return subprocess.Popen(['printf', 'KTAP version 1\n'], + text=True, stdout=subprocess.PIPE) + + with tempfile.TemporaryDirectory('') as build_dir: + tree = kunit_kernel.LinuxSourceTree(build_dir, + kunitconfig_paths=[os.devnull]) + with mock.patch.object(tree._ops, 'start', side_effect=fake_start), \ + mock.patch.object(kunit_kernel.subprocess, 'call'): + kernel_args = ['mem=1G'] + for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir, + filter_glob='suite.test1'): + pass + for _ in tree.run_kernel(args=kernel_args, build_dir=build_dir, + filter_glob='suite.test2'): + pass + self.assertEqual(kernel_args, ['mem=1G'], + 'run_kernel() should not modify caller args') + self.assertIn('kunit.filter_glob=suite.test1', start_calls[0]) + self.assertIn('kunit.filter_glob=suite.test2', start_calls[1]) + def test_build_reconfig_no_config(self): with tempfile.TemporaryDirectory('') as build_dir: with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 9d2df1f..c2661a3 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -475,8 +475,8 @@ static void sve2_sigill(void) static void sve2p1_sigill(void) { - /* BFADD Z0.H, Z0.H, Z0.H */ - asm volatile(".inst 0x65000000" : : : "z0"); + /* LD1Q {Z0.Q}, P0/Z, [Z0.D, X0] */ + asm volatile(".inst 0xC400A000" : : : "z0"); } static void sve2p2_sigill(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c new file mode 100644 index 0000000..2259f45 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_prot_none_fault.c
@@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026 ARM Limited + */ + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static uint64_t *gcs_page; +static bool post_mprotect; + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +static bool alloc_gcs(struct tdescr *td) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, + page_size, 0); + if (gcs_page == MAP_FAILED) { + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", + page_size, errno); + return false; + } + + return true; +} + +static int gcs_prot_none_fault_trigger(struct tdescr *td) +{ + /* Verify that the page is readable (ie, not completely unmapped) */ + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); + + if (mprotect(gcs_page, sysconf(_SC_PAGE_SIZE), PROT_NONE) != 0) { + fprintf(stderr, "mprotect(PROT_NONE) failed: %d\n", errno); + return 0; + } + post_mprotect = true; + + /* This should trigger a fault if PROT_NONE is honoured for the GCS page */ + fprintf(stderr, "Read value after mprotect(PROT_NONE) 0x%lx\n", gcs_page[0]); + return 0; +} + +static int gcs_prot_none_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* A fault before mprotect(PROT_NONE) is unexpected. */ + if (!post_mprotect) + return 0; + + return 1; +} + +struct tdescr tde = { + .name = "GCS PROT_NONE fault", + .descr = "Read from GCS after mprotect(PROT_NONE) segfaults", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sanity_disabled = true, + .init = alloc_gcs, + .trigger = gcs_prot_none_fault_trigger, + .run = gcs_prot_none_fault_signal, +};
diff --git a/tools/testing/selftests/bpf/DENYLIST.asan b/tools/testing/selftests/bpf/DENYLIST.asan new file mode 100644 index 0000000..d7fe372 --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.asan
@@ -0,0 +1,3 @@ +*arena* +task_local_data +uprobe_multi_test
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6776158..6548596 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,11 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) +ifdef ASAN +SAN_CFLAGS ?= -fsanitize=address -fno-omit-frame-pointer +else SAN_CFLAGS ?= +endif SAN_LDFLAGS ?= $(SAN_CFLAGS) RELEASE ?= OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) @@ -326,8 +330,8 @@ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ - EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -338,8 +342,8 @@ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ - EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -404,6 +408,8 @@ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ + HOSTPKG_CONFIG='$(PKG_CONFIG)' \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index aeec9ed..f74b313 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -230,8 +230,8 @@ static void trigger_fentry_setup(void) static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); - char **syms = NULL; - size_t cnt = 0; + struct bpf_link *link = NULL; + struct ksyms *ksyms = NULL; /* Some recursive functions will be skipped in * bpf_get_ksyms -> skip_entry, as they can introduce sufficient @@ -241,16 +241,18 @@ static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) * So, don't run the kprobe-multi-all and kretprobe-multi-all on * a debug kernel. */ - if (bpf_get_ksyms(&syms, &cnt, true)) { + if (bpf_get_ksyms(&ksyms, true)) { fprintf(stderr, "failed to get ksyms\n"); exit(1); } - opts.syms = (const char **) syms; - opts.cnt = cnt; + opts.syms = (const char **)ksyms->filtered_syms; + opts.cnt = ksyms->filtered_cnt; opts.retprobe = kretprobe; /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ - if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) { + link = bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts); + free_kallsyms_local(ksyms); + if (!link) { fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); exit(1); }
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 4bc2d25..6cb5650 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -8,6 +8,7 @@ #include <errno.h> #include <syscall.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ +#include <linux/args.h> static inline unsigned int bpf_num_possible_cpus(void) { @@ -21,25 +22,43 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } -/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst - * is zero-terminated string no matter what (unless sz == 0, in which case - * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs - * in what is returned. Given this is internal helper, it's trivial to extend - * this, when necessary. Use this instead of strncpy inside libbpf source code. +/* + * Simplified strscpy() implementation. The kernel one is in lib/string.c */ -static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) +static inline ssize_t sized_strscpy(char *dest, const char *src, size_t count) { - size_t i; + long res = 0; - if (sz == 0) - return; + if (count == 0) + return -E2BIG; - sz--; - for (i = 0; i < sz && src[i]; i++) - dst[i] = src[i]; - dst[i] = '\0'; + while (count > 1) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Force NUL-termination. */ + dest[res] = '\0'; + + /* Return E2BIG if the source didn't stop */ + return src[res] ? -E2BIG : res; } +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst)) +#define __strscpy1(dst, src, size) \ + sized_strscpy(dst, src, size) + +#undef strscpy /* Redefine the placeholder from tools/include/linux/string.h */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + #define __bpf_percpu_val_align __attribute__((__aligned__(8))) #define BPF_DECLARE_PERCPU(type, name) \
diff --git a/tools/testing/selftests/bpf/bpftool_helpers.c b/tools/testing/selftests/bpf/bpftool_helpers.c index a582494..929fc25 100644 --- a/tools/testing/selftests/bpf/bpftool_helpers.c +++ b/tools/testing/selftests/bpf/bpftool_helpers.c
@@ -1,24 +1,37 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "bpftool_helpers.h" #include <unistd.h> #include <string.h> #include <stdbool.h> +#include "bpf_util.h" +#include "bpftool_helpers.h" + #define BPFTOOL_PATH_MAX_LEN 64 #define BPFTOOL_FULL_CMD_MAX_LEN 512 #define BPFTOOL_DEFAULT_PATH "tools/sbin/bpftool" -static int detect_bpftool_path(char *buffer) +static int detect_bpftool_path(char *buffer, size_t size) { char tmp[BPFTOOL_PATH_MAX_LEN]; + const char *env_path; + + /* First, check if BPFTOOL environment variable is set */ + env_path = getenv("BPFTOOL"); + if (env_path && access(env_path, X_OK) == 0) { + strscpy(buffer, env_path, size); + return 0; + } else if (env_path) { + fprintf(stderr, "bpftool '%s' doesn't exist or is not executable\n", env_path); + return 1; + } /* Check default bpftool location (will work if we are running the * default flavor of test_progs) */ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "./%s", BPFTOOL_DEFAULT_PATH); if (access(tmp, X_OK) == 0) { - strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN); + strscpy(buffer, tmp, size); return 0; } @@ -27,11 +40,11 @@ static int detect_bpftool_path(char *buffer) */ snprintf(tmp, BPFTOOL_PATH_MAX_LEN, "../%s", BPFTOOL_DEFAULT_PATH); if (access(tmp, X_OK) == 0) { - strncpy(buffer, tmp, BPFTOOL_PATH_MAX_LEN); + strscpy(buffer, tmp, size); return 0; } - /* Failed to find bpftool binary */ + fprintf(stderr, "Failed to detect bpftool path, use BPFTOOL env var to override\n"); return 1; } @@ -44,7 +57,7 @@ static int run_command(char *args, char *output_buf, size_t output_max_len) int ret; /* Detect and cache bpftool binary location */ - if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path)) + if (bpftool_path[0] == 0 && detect_bpftool_path(bpftool_path, sizeof(bpftool_path))) return 1; ret = snprintf(command, BPFTOOL_FULL_CMD_MAX_LEN, "%s %s%s",
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 20cede4..45cd0b4 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -86,7 +86,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers enable[len] = 0; close(fd); } else { - bpf_strlcpy(enable, controllers, sizeof(enable)); + strscpy(enable, controllers); } snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c index febd6b1..364c557 100644 --- a/tools/testing/selftests/bpf/jit_disasm_helpers.c +++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
@@ -122,15 +122,15 @@ static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len) pc += cnt; } qsort(labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32); - for (i = 0; i < labels.cnt; ++i) - /* gcc is unable to infer upper bound for labels.cnt and assumes - * it to be U32_MAX. U32_MAX takes 10 decimal digits. - * snprintf below prints into labels.names[*], - * which has space only for two digits and a letter. - * To avoid truncation warning use (i % MAX_LOCAL_LABELS), - * which informs gcc about printed value upper bound. - */ - snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i % MAX_LOCAL_LABELS); + /* gcc is unable to infer upper bound for labels.cnt and + * assumes it to be U32_MAX. U32_MAX takes 10 decimal digits. + * snprintf below prints into labels.names[*], which has space + * only for two digits and a letter. To avoid truncation + * warning use (i < MAX_LOCAL_LABELS), which informs gcc about + * printed value upper bound. + */ + for (i = 0; i < labels.cnt && i < MAX_LOCAL_LABELS; ++i) + snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i); /* now print with labels */ labels.print_phase = true;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 0a6a556..b82f572 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -432,7 +432,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, memset(addr, 0, sizeof(*sun)); sun->sun_family = family; sun->sun_path[0] = 0; - strcpy(sun->sun_path + 1, addr_str); + strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1); if (len) *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); return 0; @@ -581,8 +581,7 @@ int open_tuntap(const char *dev_name, bool need_mac) return -1; ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + strscpy(ifr.ifr_name, dev_name); err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5225d69..c69080c 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -346,8 +346,7 @@ static void test_task_sleepable(void) close(finish_pipe[1]); test_data = malloc(sizeof(char) * 10); - strncpy(test_data, "test_data", 10); - test_data[9] = '\0'; + strscpy(test_data, "test_data", 10); test_data_long = malloc(sizeof(char) * 5000); for (int i = 0; i < 5000; ++i) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index b7d1b52..f829b6f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -281,7 +281,7 @@ static void test_dctcp_fallback(void) dctcp_skel = bpf_dctcp__open(); if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) return; - strcpy(dctcp_skel->rodata->fallback_cc, "cubic"); + strscpy(dctcp_skel->rodata->fallback_cc, "cubic"); if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load")) goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 9015e2c..478a77c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -202,7 +202,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) iter_fd = bpf_iter_create(bpf_link__fd(link)); if (!ASSERT_GE(iter_fd, 0, "iter_create")) - goto out; + goto out_link; /* trigger the program run */ (void)read(iter_fd, buf, sizeof(buf)); @@ -210,6 +210,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); close(iter_fd); +out_link: + bpf_link__destroy(link); out: cgrp_ls_sleepable__destroy(skel); }
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index dd75ccb..469e928 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -308,8 +308,10 @@ static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches return -1; } - strncpy(type_str, type, type_sz); - strncpy(field_str, field, field_sz); + memcpy(type_str, type, type_sz); + type_str[type_sz] = '\0'; + memcpy(field_str, field, field_sz); + field_str[field_sz] = '\0'; btf_id = btf__find_by_name(btf, type_str); if (btf_id < 0) { PRINT_FAIL("No BTF info for type %s\n", type_str);
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b9f86cb..5fda115 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -137,11 +137,14 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ ); link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) { + bpf_object__close(obj); goto cleanup; + } err = bpf_prog_test_run_opts(aux_prog_fd, &topts); bpf_link__destroy(link); + bpf_object__close(obj); if (!ASSERT_OK(err, "test_run")) goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index c534b4d..3078d82 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -412,8 +412,8 @@ static void check_fd_array_cnt__fd_array_too_big(void) ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); cleanup_fds: - while (i > 0) - Close(extra_fds[--i]); + while (i-- > 0) + Close(extra_fds[i]); } void test_fd_array_cnt(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 08bae13..fb48926 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -570,7 +570,7 @@ static int create_tap(const char *ifname) }; int fd, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); fd = open("/dev/net/tun", O_RDWR); if (fd < 0) @@ -599,7 +599,7 @@ static int ifup(const char *ifname) struct ifreq ifr = {}; int sk, ret; - strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, ifname); sk = socket(PF_INET, SOCK_DGRAM, 0); if (sk < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index d0b405e..ea1a676 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -61,6 +61,7 @@ static void test_reenter_update(void) ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy"); out: + free(value); htab_update__destroy(skel); }
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 6e35e13..399fe91 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -104,11 +104,8 @@ void test_kmem_cache_iter(void) if (!ASSERT_GE(iter_fd, 0, "iter_create")) goto destroy; - memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf)) > 0) { - /* Read out all contents */ - printf("%s", buf); - } + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ /* Next reads should return 0 */ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 9caef22..f81dcd6 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -456,25 +456,23 @@ static void test_kprobe_multi_bench_attach(bool kernel) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; - char **syms = NULL; - size_t cnt = 0; + struct ksyms *ksyms = NULL; - if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms")) + if (!ASSERT_OK(bpf_get_ksyms(&ksyms, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) goto cleanup; - opts.syms = (const char **) syms; - opts.cnt = cnt; + opts.syms = (const char **)ksyms->filtered_syms; + opts.cnt = ksyms->filtered_cnt; do_bench_test(skel, &opts); cleanup: kprobe_multi_empty__destroy(skel); - if (syms) - free(syms); + free_kallsyms_local(ksyms); } static void test_kprobe_multi_bench_attach_addr(bool kernel)
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c index 3bc730b..1b25d5c 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c +++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -117,7 +117,7 @@ void test_lwt_seg6local(void) const char *ns1 = NETNS_BASE "1"; const char *ns6 = NETNS_BASE "6"; struct nstoken *nstoken = NULL; - const char *foobar = "foobar"; + const char foobar[] = "foobar"; ssize_t bytes; int sfd, cfd; char buf[7];
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c new file mode 100644 index 0000000..506ed55 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c
@@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> + +#include "map_kptr_race.skel.h" + +static int get_map_id(int map_fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + if (!ASSERT_OK(bpf_map_get_info_by_fd(map_fd, &info, &len), "get_map_info")) + return -1; + return info.id; +} + +static int read_refs(struct map_kptr_race *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &opts); + if (!ASSERT_OK(ret, "count_ref run")) + return -1; + if (!ASSERT_OK(opts.retval, "count_ref retval")) + return -1; + return skel->bss->num_of_refs; +} + +static void test_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_percpu_htab_leak(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct map_kptr_race *skel, *watcher; + int ret, map_id; + + skel = map_kptr_race__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + if (skel->rodata->nr_cpus > 16) + skel->rodata->nr_cpus = 16; + + ret = map_kptr_race__load(skel); + if (!ASSERT_OK(ret, "load")) + goto out_skel; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_percpu_htab_leak), &opts); + if (!ASSERT_OK(ret, "test_percpu_htab_leak run")) + goto out_skel; + if (!ASSERT_OK(opts.retval, "test_percpu_htab_leak retval")) + goto out_skel; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_percpu_hash_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free); + if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "percpu_htab refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + map_kptr_race__destroy(skel); +} + +static void test_sk_ls_leak(void) +{ + struct map_kptr_race *skel, *watcher; + int listen_fd = -1, client_fd = -1, map_id; + + skel = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (!ASSERT_OK(map_kptr_race__attach(skel), "attach")) + goto out_skel; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(listen_fd, 0, "start_server")) + goto out_skel; + + client_fd = connect_to_fd(listen_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out_skel; + + if (!ASSERT_EQ(skel->bss->sk_ls_leak_done, 1, "sk_ls_leak_done")) + goto out_skel; + + close(client_fd); + client_fd = -1; + close(listen_fd); + listen_fd = -1; + + map_id = get_map_id(bpf_map__fd(skel->maps.race_sk_ls_map)); + if (!ASSERT_GE(map_id, 0, "map_id")) + goto out_skel; + + watcher = map_kptr_race__open_and_load(); + if (!ASSERT_OK_PTR(watcher, "watcher open_and_load")) + goto out_skel; + + watcher->bss->target_map_id = map_id; + watcher->links.map_put = bpf_program__attach(watcher->progs.map_put); + if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry")) + goto out_watcher; + watcher->links.sk_map_free = bpf_program__attach(watcher->progs.sk_map_free); + if (!ASSERT_OK_PTR(watcher->links.sk_map_free, "attach fexit")) + goto out_watcher; + + map_kptr_race__destroy(skel); + skel = NULL; + + kern_sync_rcu(); + + while (!READ_ONCE(watcher->bss->map_freed)) + sched_yield(); + + ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed"); + ASSERT_EQ(read_refs(watcher), 2, "sk_ls refcount"); + +out_watcher: + map_kptr_race__destroy(watcher); +out_skel: + if (client_fd >= 0) + close(client_fd); + if (listen_fd >= 0) + close(listen_fd); + map_kptr_race__destroy(skel); +} + +void serial_test_map_kptr_race(void) +{ + if (test__start_subtest("htab_leak")) + test_htab_leak(); + if (test__start_subtest("percpu_htab_leak")) + test_percpu_htab_leak(); + if (test__start_subtest("sk_ls_leak")) + test_sk_ls_leak(); +}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index a043af9..4144132 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -28,9 +28,9 @@ static void test_queue_stack_map_by_type(int type) vals[i] = rand(); if (type == QUEUE) - strncpy(file, "./test_queue_map.bpf.o", sizeof(file)); + strscpy(file, "./test_queue_map.bpf.o"); else if (type == STACK) - strncpy(file, "./test_stack_map.bpf.o", sizeof(file)); + strscpy(file, "./test_stack_map.bpf.o"); else return;
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index d93a0c7..cb8dd2f 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -422,15 +422,69 @@ static bool is_valid_range(enum num_t t, struct range x) } } -static struct range range_improve(enum num_t t, struct range old, struct range new) +static struct range range_intersection(enum num_t t, struct range old, struct range new) { return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); } +/* + * Result is precise when 'x' and 'y' overlap or form a continuous range, + * result is an over-approximation if 'x' and 'y' do not overlap. + */ +static struct range range_union(enum num_t t, struct range x, struct range y) +{ + if (!is_valid_range(t, x)) + return y; + if (!is_valid_range(t, y)) + return x; + return range(t, min_t(t, x.a, y.a), max_t(t, x.b, y.b)); +} + +/* + * This function attempts to improve x range intersecting it with y. + * range_cast(... to_t ...) looses precision for ranges that pass to_t + * min/max boundaries. To avoid such precision loses this function + * splits both x and y into halves corresponding to non-overflowing + * sub-ranges: [0, smin] and [smax, -1]. + * Final result is computed as follows: + * + * ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪ + * ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1])) + * + * Precision might still be lost if final union is not a continuous range. + */ +static struct range range_refine_in_halves(enum num_t x_t, struct range x, + enum num_t y_t, struct range y) +{ + struct range x_pos, x_neg, y_pos, y_neg, r_pos, r_neg; + u64 smax, smin, neg_one; + + if (t_is_32(x_t)) { + smax = (u64)(u32)S32_MAX; + smin = (u64)(u32)S32_MIN; + neg_one = (u64)(u32)(s32)(-1); + } else { + smax = (u64)S64_MAX; + smin = (u64)S64_MIN; + neg_one = U64_MAX; + } + x_pos = range_intersection(x_t, x, range(x_t, 0, smax)); + x_neg = range_intersection(x_t, x, range(x_t, smin, neg_one)); + y_pos = range_intersection(y_t, y, range(x_t, 0, smax)); + y_neg = range_intersection(y_t, y, range(y_t, smin, neg_one)); + r_pos = range_intersection(x_t, x_pos, range_cast(y_t, x_t, y_pos)); + r_neg = range_intersection(x_t, x_neg, range_cast(y_t, x_t, y_neg)); + return range_union(x_t, r_pos, r_neg); + +} + static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) { struct range y_cast; + if (t_is_32(x_t) == t_is_32(y_t)) + x = range_refine_in_halves(x_t, x, y_t, y); + y_cast = range_cast(y_t, x_t, y); /* If we know that @@ -444,7 +498,7 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, */ if (x_t == S64 && y_t == S32 && y_cast.a <= S32_MAX && y_cast.b <= S32_MAX && (s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX) - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); /* the case when new range knowledge, *y*, is a 32-bit subregister * range, while previous range knowledge, *x*, is a full register @@ -462,25 +516,11 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); if (!is_valid_range(x_t, x_swap)) return x; - return range_improve(x_t, x, x_swap); - } - - if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { - if (x_t == S64 && x.a > x.b) { - if (x.b < y.a && x.a <= y.b) - return range(x_t, x.a, y.b); - if (x.a > y.b && x.b >= y.a) - return range(x_t, y.a, x.b); - } else if (x_t == U64 && y.a > y.b) { - if (y.b < x.a && y.a <= x.b) - return range(x_t, y.a, x.b); - if (y.a > x.b && y.b >= x.a) - return range(x_t, x.a, y.b); - } + return range_intersection(x_t, x, x_swap); } /* otherwise, plain range cast and intersection works */ - return range_improve(x_t, x, y_cast); + return range_intersection(x_t, x, y_cast); } /* ======================= @@ -2091,7 +2131,7 @@ static struct subtest_case crafted_cases[] = { {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, {U64, U32, {0, 0x100000000}, {0, 0}}, - {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + {U64, U32, {0xfffffffe, 0x300000000}, {0x80000000, 0x80000000}}, {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, /* these are tricky cases where lower 32 bits allow to tighten 64
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index e4dac52..77fe1bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -212,7 +212,7 @@ void test_setget_sockopt(void) if (!ASSERT_OK_PTR(skel, "open skel")) goto done; - strcpy(skel->rodata->veth, "binddevtest1"); + strscpy(skel->rodata->veth, "binddevtest1"); skel->rodata->veth_ifindex = if_nametoindex("binddevtest1"); if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex")) goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c index 3eefdfe..657d897 100644 --- a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
@@ -34,7 +34,7 @@ void test_skc_to_unix_sock(void) memset(&sockaddr, 0, sizeof(sockaddr)); sockaddr.sun_family = AF_UNIX; - strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + strscpy(sockaddr.sun_path, sock_path); sockaddr.sun_path[0] = '\0'; err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 256707e..dd3c757 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -204,7 +204,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) /* Fail since bpf_link for the same prog type has been created. */ link2 = bpf_program__attach_sockmap(prog_clone, map); if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) { - bpf_link__detach(link2); + bpf_link__destroy(link2); goto out; } @@ -230,7 +230,7 @@ static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) if (!ASSERT_OK(err, "bpf_link_update")) goto out; out: - bpf_link__detach(link); + bpf_link__destroy(link); test_skmsg_load_helpers__destroy(skel); } @@ -417,7 +417,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) goto out; - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) @@ -426,7 +426,7 @@ static void test_sockmap_skb_verdict_attach_with_link(void) /* Fail since attaching with the same prog/map has been done. */ link = bpf_program__attach_sockmap(prog, map); if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap")) - bpf_link__detach(link); + bpf_link__destroy(link); err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT); if (!ASSERT_OK(err, "bpf_prog_detach2")) @@ -747,13 +747,13 @@ static void test_sockmap_skb_verdict_peek_with_link(void) test_sockmap_skb_verdict_peek_helper(map); ASSERT_EQ(pass->bss->clone_called, 1, "clone_called"); out: - bpf_link__detach(link); + bpf_link__destroy(link); test_sockmap_pass_prog__destroy(pass); } static void test_sockmap_unconnected_unix(void) { - int err, map, stream = 0, dgram = 0, zero = 0; + int err, map, stream = -1, dgram = -1, zero = 0; struct test_sockmap_pass_prog *skel; skel = test_sockmap_pass_prog__open_and_load(); @@ -764,22 +764,22 @@ static void test_sockmap_unconnected_unix(void) stream = xsocket(AF_UNIX, SOCK_STREAM, 0); if (stream < 0) - return; + goto out; dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); - if (dgram < 0) { - close(stream); - return; - } + if (dgram < 0) + goto out; err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); - ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + if (!ASSERT_ERR(err, "bpf_map_update_elem(stream)")) + goto out; err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); ASSERT_OK(err, "bpf_map_update_elem(dgram)"); - +out: close(stream); close(dgram); + test_sockmap_pass_prog__destroy(skel); } static void test_sockmap_many_socket(void) @@ -1027,7 +1027,7 @@ static void test_sockmap_skb_verdict_vsock_poll(void) if (xrecv_nonblock(conn, &buf, 1, 0) != 1) FAIL("xrecv_nonblock"); detach: - bpf_link__detach(link); + bpf_link__destroy(link); close: xclose(conn); xclose(peer);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index f1bdccc..cc0c68b 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -899,7 +899,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); - bpf_link__detach(link); + bpf_link__destroy(link); } static void redir_partial(int family, int sotype, int sock_map, int parser_map)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index ba6b3ec..5363743 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -142,7 +142,7 @@ static int getsetsockopt(void) /* TCP_CONGESTION can extend the string */ - strcpy(buf.cc, "nv"); + strscpy(buf.cc, "nv"); err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv")); if (err) { log_err("Failed to call setsockopt(TCP_CONGESTION)");
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c index 4006879..d42123a 100644 --- a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
@@ -54,9 +54,7 @@ static void test_private_stack_fail(void) } err = struct_ops_private_stack_fail__load(skel); - if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) - goto cleanup; - return; + ASSERT_ERR(err, "struct_ops_private_stack_fail__load"); cleanup: struct_ops_private_stack_fail__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h index 0f86b92..8342e2f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_data.h +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -262,7 +262,7 @@ static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) goto retry; - strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); + strscpy(tld_meta_p->metadata[i].name, name); atomic_store(&tld_meta_p->metadata[i].size, size); return (tld_key_t){(__s16)off}; }
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index dd7a138..2955750 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -1360,10 +1360,8 @@ static void test_tc_opts_dev_cleanup_target(int target) assert_mprog_count_ifindex(ifindex, target, 4); - ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); - ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); - ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); - return; + goto cleanup; + cleanup3: err = bpf_prog_detach_opts(fd3, loopback, target, &optd); ASSERT_OK(err, "prog_detach");
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 76d72a5..64fbda0 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1095,7 +1095,7 @@ static int tun_open(char *name) ifr.ifr_flags = IFF_TUN | IFF_NO_PI; if (*name) - strncpy(ifr.ifr_name, name, IFNAMSIZ); + strscpy(ifr.ifr_name, name); err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index 273dd41..2a1ff82 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -27,6 +27,7 @@ struct sysctl_test { OP_EPERM, SUCCESS, } result; + struct bpf_object *obj; }; static struct sysctl_test tests[] = { @@ -1471,6 +1472,7 @@ static int load_sysctl_prog_file(struct sysctl_test *test) return -1; } + test->obj = obj; return prog_fd; } @@ -1573,6 +1575,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) /* Detaching w/o checking return code: best effort attempt. */ if (progfd != -1) bpf_prog_detach(cgfd, atype); + bpf_object__close(test->obj); close(progfd); printf("[%s]\n", err ? "FAIL" : "PASS"); return err;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c index 0fe0a8f..7fc4d7d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -699,7 +699,7 @@ void test_tc_tunnel(void) return; if (!ASSERT_OK(setup(), "global setup")) - return; + goto out; for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) { cfg = &subtests_cfg[i]; @@ -711,4 +711,7 @@ void test_tc_tunnel(void) subtest_cleanup(cfg); } cleanup(); + +out: + test_tc_tunnel__destroy(skel); }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index b38c16b..9aff08a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -24,9 +24,9 @@ static struct fixture *init_fixture(void) /* for no_alu32 and cpuv4 veristat is in parent folder */ if (access("./veristat", F_OK) == 0) - strcpy(fix->veristat, "./veristat"); + strscpy(fix->veristat, "./veristat"); else if (access("../veristat", F_OK) == 0) - strcpy(fix->veristat, "../veristat"); + strscpy(fix->veristat, "../veristat"); else PRINT_FAIL("Can't find veristat binary");
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index bab4a31..7e38ec6 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -2003,9 +2003,17 @@ int testapp_stats_tx_invalid_descs(struct test_spec *test) int testapp_stats_rx_full(struct test_spec *test) { - if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) return TEST_FAILURE; - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; @@ -2015,9 +2023,17 @@ int testapp_stats_rx_full(struct test_spec *test) int testapp_stats_fill_empty(struct test_spec *test) { - if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + struct pkt_stream *tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); + if (!tmp) return TEST_FAILURE; - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_tx->xsk->pkt_stream = tmp; + + tmp = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + if (!tmp) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = tmp; test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty;
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 2ee17ef..56cbea2 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -62,8 +62,10 @@ static void release_child(struct child *child) return; close(child->go[1]); close(child->go[0]); - if (child->thread) + if (child->thread) { pthread_join(child->thread, NULL); + child->thread = 0; + } close(child->c2p[0]); close(child->c2p[1]); if (child->pid > 0) @@ -331,6 +333,8 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi { static struct child child; + memset(&child, 0, sizeof(child)); + /* no pid filter */ __test_attach_api(binary, pattern, opts, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c index 8337c6b..aaa2854 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier_log.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -47,7 +47,7 @@ static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error) static void verif_log_subtest(const char *name, bool expect_load_error, int log_level) { LIBBPF_OPTS(bpf_prog_load_opts, opts); - char *exp_log, prog_name[16], op_name[32]; + char *exp_log, prog_name[24], op_name[32]; struct test_log_buf *skel; struct bpf_program *prog; size_t fixed_log_sz;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index fb95270..e8ea264 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -610,6 +610,61 @@ static void test_xdp_bonding_features(struct skeletons *skeletons) system("ip link del bond"); } +/* + * Test that changing xmit_hash_policy to vlan+srcmac is rejected when a + * native XDP program is loaded on a bond in 802.3ad or balance-xor mode. + * These modes support XDP only when xmit_hash_policy != vlan+srcmac; freely + * changing the policy creates an inconsistency that triggers a WARNING in + * dev_xdp_uninstall() during device teardown. + */ +static void test_xdp_bonding_xmit_policy_compat(struct skeletons *skeletons) +{ + struct nstoken *nstoken = NULL; + int bond_ifindex = -1; + int xdp_fd, err; + + SYS(out, "ip netns add ns_xmit_policy"); + nstoken = open_netns("ns_xmit_policy"); + if (!ASSERT_OK_PTR(nstoken, "open ns_xmit_policy")) + goto out; + + /* 802.3ad with layer2+3 policy: native XDP is supported */ + SYS(out, "ip link add bond0 type bond mode 802.3ad xmit_hash_policy layer2+3"); + SYS(out, "ip link add veth0 type veth peer name veth0p"); + SYS(out, "ip link set veth0 master bond0"); + SYS(out, "ip link set bond0 up"); + + bond_ifindex = if_nametoindex("bond0"); + if (!ASSERT_GT(bond_ifindex, 0, "bond0 ifindex")) + goto out; + + xdp_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); + if (!ASSERT_GE(xdp_fd, 0, "xdp_dummy fd")) + goto out; + + err = bpf_xdp_attach(bond_ifindex, xdp_fd, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "attach XDP to bond0")) + goto out; + + /* With XDP loaded, switching to vlan+srcmac must be rejected */ + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_NEQ(err, 0, "vlan+srcmac change with XDP loaded should fail"); + + /* Detach XDP first, then the same change must succeed */ + ASSERT_OK(bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL), + "detach XDP from bond0"); + + bond_ifindex = -1; + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); + ASSERT_OK(err, "vlan+srcmac change without XDP should succeed"); + +out: + if (bond_ifindex > 0) + bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL); + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_xmit_policy"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -677,6 +732,9 @@ void serial_test_xdp_bonding(void) test_case->xmit_policy); } + if (test__start_subtest("xdp_bonding_xmit_policy_compat")) + test_xdp_bonding_xmit_policy_compat(&skeletons); + if (test__start_subtest("xdp_bonding_redirect_multi")) test_xdp_bonding_redirect_multi(&skeletons);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c index 3f9146d..325e0b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -67,7 +67,7 @@ void test_xdp_flowtable(void) struct nstoken *tok = NULL; int iifindex, stats_fd; __u32 value, key = 0; - struct bpf_link *link; + struct bpf_link *link = NULL; if (SYS_NOFAIL("nft -v")) { fprintf(stdout, "Missing required nft tool\n"); @@ -160,6 +160,7 @@ void test_xdp_flowtable(void) ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed"); out: + bpf_link__destroy(link); xdp_flowtable__destroy(skel); if (tok) close_netns(tok);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 19f92af..5c31054 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -126,10 +126,10 @@ static int open_xsk(int ifindex, struct xsk *xsk) static void close_xsk(struct xsk *xsk) { - if (xsk->umem) - xsk_umem__delete(xsk->umem); if (xsk->socket) xsk_socket__delete(xsk->socket); + if (xsk->umem) + xsk_umem__delete(xsk->umem); munmap(xsk->umem_area, UMEM_SIZE); }
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c index 13cdb11..9cbb744 100644 --- a/tools/testing/selftests/bpf/progs/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -48,7 +48,7 @@ int dmabuf_collector(struct bpf_iter__dmabuf *ctx) /* Buffers are not required to be named */ if (pname) { - if (bpf_probe_read_kernel(name, sizeof(name), pname)) + if (bpf_probe_read_kernel_str(name, sizeof(name), pname) < 0) return 1; /* Name strings can be provided by userspace */
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index a01c273..858af59 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,43 +18,43 @@ return *(u64 *)num; \ } -__msg(": R0=0xffffffff80000000") +__msg("R{{.}}=0xffffffff80000000") check_assert(s64, ==, eq_int_min, INT_MIN); -__msg(": R0=0x7fffffff") +__msg("R{{.}}=0x7fffffff") check_assert(s64, ==, eq_int_max, INT_MAX); -__msg(": R0=0") +__msg("R{{.}}=0") check_assert(s64, ==, eq_zero, 0); -__msg(": R0=0x8000000000000000 R1=0x8000000000000000") +__msg("R{{.}}=0x8000000000000000") check_assert(s64, ==, eq_llong_min, LLONG_MIN); -__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff") +__msg("R{{.}}=0x7fffffffffffffff") check_assert(s64, ==, eq_llong_max, LLONG_MAX); -__msg(": R0=scalar(id=1,smax=0x7ffffffe)") +__msg("R{{.}}=scalar(id=1,smax=0x7ffffffe)") check_assert(s64, <, lt_pos, INT_MAX); -__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, <, lt_zero, 0); -__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff") +__msg("R{{.}}=scalar(id=1,smax=0xffffffff7fffffff") check_assert(s64, <, lt_neg, INT_MIN); -__msg(": R0=scalar(id=1,smax=0x7fffffff)") +__msg("R{{.}}=scalar(id=1,smax=0x7fffffff)") check_assert(s64, <=, le_pos, INT_MAX); -__msg(": R0=scalar(id=1,smax=0)") +__msg("R{{.}}=scalar(id=1,smax=0)") check_assert(s64, <=, le_zero, 0); -__msg(": R0=scalar(id=1,smax=0xffffffff80000000") +__msg("R{{.}}=scalar(id=1,smax=0xffffffff80000000") check_assert(s64, <=, le_neg, INT_MIN); -__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_pos, INT_MAX); -__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_zero, 0); -__msg(": R0=scalar(id=1,smin=0xffffffff80000001") +__msg("R{{.}}=scalar(id=1,smin=0xffffffff80000001") check_assert(s64, >, gt_neg, INT_MIN); -__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_pos, INT_MAX); -__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg("R{{.}}=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_zero, 0); -__msg(": R0=scalar(id=1,smin=0xffffffff80000000") +__msg("R{{.}}=scalar(id=1,smin=0xffffffff80000000") check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc")
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 8a0fdff..9ea1353 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -8,6 +8,11 @@ #include "bpf_experimental.h" extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; +extern void bpf_preempt_disable(void) __ksym; +extern void bpf_preempt_enable(void) __ksym; +extern void bpf_local_irq_save(unsigned long *) __ksym; +extern void bpf_local_irq_restore(unsigned long *) __ksym; #define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) @@ -131,7 +136,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,11 +152,13 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); - return throwing_subprog(ctx); + throwing_subprog(ctx); + bpf_rcu_read_unlock(); + return 0; } static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2) @@ -346,4 +353,47 @@ int reject_exception_throw_cb_diff(struct __sk_buff *ctx) return 0; } +__noinline static int always_throws(void) +{ + bpf_throw(0); + return 0; +} + +__noinline static int rcu_lock_then_throw(void) +{ + bpf_rcu_read_lock(); + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_rcu_read_lock-ed region") +int reject_subprog_rcu_lock_throw(void *ctx) +{ + rcu_lock_then_throw(); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_preempt_disable-ed region") +int reject_subprog_throw_preempt_lock(void *ctx) +{ + bpf_preempt_disable(); + always_throws(); + bpf_preempt_enable(); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_throw cannot be used inside bpf_local_irq_save-ed region") +int reject_subprog_throw_irq_lock(void *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + always_throws(); + bpf_local_irq_restore(&flags); + return 0; +} + char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_race.c b/tools/testing/selftests/bpf/progs/map_kptr_race.c new file mode 100644 index 0000000..f6f136c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr_race.c
@@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../test_kmods/bpf_testmod_kfunc.h" + +struct map_value { + struct prog_test_ref_kfunc __kptr *ref_ptr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} race_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} race_percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} race_sk_ls_map SEC(".maps"); + +int num_of_refs; +int sk_ls_leak_done; +int target_map_id; +int map_freed; +const volatile int nr_cpus; + +SEC("tc") +int test_htab_leak(struct __sk_buff *skb) +{ + struct prog_test_ref_kfunc *p, *old; + struct map_value val = {}; + struct map_value *v; + int key = 0; + + if (bpf_map_update_elem(&race_hash_map, &key, &val, BPF_ANY)) + return 1; + + v = bpf_map_lookup_elem(&race_hash_map, &key); + if (!v) + return 2; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 3; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + bpf_map_delete_elem(&race_hash_map, &key); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 4; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + return 0; +} + +static int fill_percpu_kptr(struct map_value *v) +{ + struct prog_test_ref_kfunc *p, *old; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 1; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + return 0; +} + +SEC("tc") +int test_percpu_htab_leak(struct __sk_buff *skb) +{ + struct map_value *v, *arr[16] = {}; + struct map_value val = {}; + int key = 0; + int err = 0; + + if (bpf_map_update_elem(&race_percpu_hash_map, &key, &val, BPF_ANY)) + return 1; + + for (int i = 0; i < nr_cpus; i++) { + v = bpf_map_lookup_percpu_elem(&race_percpu_hash_map, &key, i); + if (!v) + return 2; + arr[i] = v; + } + + bpf_map_delete_elem(&race_percpu_hash_map, &key); + + for (int i = 0; i < nr_cpus; i++) { + v = arr[i]; + err = fill_percpu_kptr(v); + if (err) + return 3; + } + + return 0; +} + +SEC("tp_btf/inet_sock_set_state") +int BPF_PROG(test_sk_ls_leak, struct sock *sk, int oldstate, int newstate) +{ + struct prog_test_ref_kfunc *p, *old; + struct map_value *v; + + if (newstate != BPF_TCP_SYN_SENT) + return 0; + + if (sk_ls_leak_done) + return 0; + + v = bpf_sk_storage_get(&race_sk_ls_map, sk, NULL, + BPF_SK_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + bpf_sk_storage_delete(&race_sk_ls_map, sk); + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) + bpf_kfunc_call_test_release(old); + + sk_ls_leak_done = 1; + return 0; +} + +long target_map_ptr; + +SEC("fentry/bpf_map_put") +int BPF_PROG(map_put, struct bpf_map *map) +{ + if (target_map_id && map->id == (u32)target_map_id) + target_map_ptr = (long)map; + return 0; +} + +SEC("fexit/htab_map_free") +int BPF_PROG(htab_map_free, struct bpf_map *map) +{ + if (target_map_ptr && (long)map == target_map_ptr) + map_freed = 1; + return 0; +} + +SEC("fexit/bpf_sk_storage_map_free") +int BPF_PROG(sk_map_free, struct bpf_map *map) +{ + if (target_map_ptr && (long)map == target_map_ptr) + map_freed = 1; + return 0; +} + +SEC("syscall") +int count_ref(void *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long arg = 0; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + num_of_refs = p->cnt.refs.counter; + + bpf_kfunc_call_test_release(p); + return 0; +} + +char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 5605314..79a3282 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1148,7 +1148,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ +__flag(BPF_F_TEST_REG_INVARIANTS) __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1863,4 +1863,272 @@ l1_%=: r0 = 1; \ : __clobber_all); } +/* This test covers the bounds deduction when the u64 range and the tnum + * overlap only at umax. After instruction 3, the ranges look as follows: + * + * 0 umin=0xe01 umax=0xf00 U64_MAX + * | [xxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x | tnum values + * + * The verifier can therefore deduce that the R0=0xf0=240. + */ +SEC("socket") +__description("bounds refinement with single-value tnum on umax") +__msg("3: (15) if r0 == 0xe0 {{.*}} R0=240") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void bounds_refinement_tnum_umax(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 |= 0xe0; \ + r0 &= 0xf0; \ + if r0 == 0xe0 goto +2; \ + if r0 == 0xf0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction when the u64 range and the tnum + * overlap only at umin. After instruction 3, the ranges look as follows: + * + * 0 umin=0xe00 umax=0xeff U64_MAX + * | [xxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x | tnum values + * + * The verifier can therefore deduce that the R0=0xe0=224. + */ +SEC("socket") +__description("bounds refinement with single-value tnum on umin") +__msg("3: (15) if r0 == 0xf0 {{.*}} R0=224") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void bounds_refinement_tnum_umin(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 |= 0xe0; \ + r0 &= 0xf0; \ + if r0 == 0xf0 goto +2; \ + if r0 == 0xe0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction when the only possible tnum value is + * in the middle of the u64 range. After instruction 3, the ranges look as + * follows: + * + * 0 umin=0x7cf umax=0x7df U64_MAX + * | [xxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * | x x x x x | tnum values + * | +--- 0x7e0 + * +--- 0x7d0 + * + * Since the lower four bits are zero, the tnum and the u64 range only overlap + * in R0=0x7d0=2000. Instruction 5 is therefore dead code. + */ +SEC("socket") +__description("bounds refinement with single-value tnum in middle of range") +__msg("3: (a5) if r0 < 0x7cf {{.*}} R0=2000") +__success __log_level(2) +__naked void bounds_refinement_tnum_middle(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 & 0x0f goto +4; \ + if r0 > 0x7df goto +3; \ + if r0 < 0x7cf goto +2; \ + if r0 == 0x7d0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test cover the negative case for the tnum/u64 overlap. Since + * they contain the same two values (i.e., {0, 1}), we can't deduce + * anything more. + */ +SEC("socket") +__description("bounds refinement: several overlaps between tnum and u64") +__msg("2: (25) if r0 > 0x1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=1,var_off=(0x0; 0x1))") +__failure __log_level(2) +__naked void bounds_refinement_several_overlaps(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 < 0 goto +3; \ + if r0 > 1 goto +2; \ + if r0 == 1 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test cover the negative case for the tnum/u64 overlap. Since + * they overlap in the two values contained by the u64 range (i.e., + * {0xf, 0x10}), we can't deduce anything more. + */ +SEC("socket") +__description("bounds refinement: multiple overlaps between tnum and u64") +__msg("2: (25) if r0 > 0x10 {{.*}} R0=scalar(smin=umin=smin32=umin32=15,smax=umax=smax32=umax32=16,var_off=(0x0; 0x1f))") +__failure __log_level(2) +__naked void bounds_refinement_multiple_overlaps(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + if r0 < 0xf goto +3; \ + if r0 > 0x10 goto +2; \ + if r0 == 0x10 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void signed_unsigned_intersection32_case1(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + if w0 < 0x3 goto 1f; /* on fall-through u32 range [3..U32_MAX] */ \ + if w0 s> 0x1 goto 1f; /* on fall-through s32 range [S32_MIN..1] */ \ + if w0 s< 0x0 goto 1f; /* range can be narrowed to [S32_MIN..-1] */ \ + r10 = 0; /* thus predicting the jump. */ \ +1: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__success +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void signed_unsigned_intersection32_case2(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0xffffffff; \ + if w0 > 0x80000003 goto 1f; /* on fall-through u32 range [0..S32_MIN+3] */ \ + if w0 s< -3 goto 1f; /* on fall-through s32 range [-3..S32_MAX] */ \ + if w0 s> 5 goto 1f; /* on fall-through s32 range [-3..5] */ \ + if w0 <= 5 goto 1f; /* range can be narrowed to [0..5] */ \ + r10 = 0; /* thus predicting the jump */ \ +1: exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("maybe_fork_scalars: OR with constant rejects OOB") +__failure __msg("invalid access to map value") +__naked void or_scalar_fork_rejects_oob(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 |= 8; \ + /* r6 is -1 (current) or 8 (pushed) */ \ + if r6 s< 0 goto l0_%=; \ + /* pushed path: r6 = 8, OOB for value_size=8 */ \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("maybe_fork_scalars: AND with constant still works") +__success __retval(0) +__naked void and_scalar_fork_still_works(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 &= 4; \ + /* \ + * r6 is 0 (pushed, 0&4==0) or 4 (current) \ + * both within value_size=8 \ + */ \ + if r6 s< 0 goto l0_%=; \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("maybe_fork_scalars: OR with constant allows in-bounds") +__success __retval(0) +__naked void or_scalar_fork_allows_inbounds(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r9 = r0; \ + r6 = *(u64*)(r9 + 0); \ + r6 s>>= 63; \ + r6 |= 4; \ + /* \ + * r6 is -1 (current) or 4 (pushed) \ + * pushed path: r6 = 4, within value_size=8 \ + */ \ + if r6 s< 0 goto l0_%=; \ + r9 += r6; \ + r0 = *(u8*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 4b779de..cffaf36 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -91,6 +91,28 @@ BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f0000) BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f000000000000) #endif +SEC("socket") +__description("BSWAP, reset reg id") +__failure __msg("math between fp pointer and register with unbounded min value is not allowed") +__naked void bswap_reset_reg_id(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + r1 = r0; \ + r0 = be16 r0; \ + if r0 != 1 goto l0_%=; \ + r2 = r10; \ + r2 += -512; \ + r2 += r1; \ + *(u8 *)(r2 + 0) = 0; \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + #else SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c index 2ef346c8..f4f8a05 100644 --- a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c +++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
@@ -348,6 +348,114 @@ l0_%=: \ : __clobber_all); } +/* + * Test that sync_linked_regs() checks reg->id (the linked target register) + * for BPF_ADD_CONST32 rather than known_reg->id (the branch register). + */ +SEC("socket") +__success +__naked void scalars_alu32_zext_linked_reg(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + w7 += 1; /* alu32: r7.id |= BPF_ADD_CONST32 */ \ + r8 = 0xFFFFffff ll; \ + if r6 < r8 goto l0_%=; \ + /* r6 in [0xFFFFFFFF, 0xFFFFFFFF] */ \ + /* sync_linked_regs: known_reg=r6, reg=r7 */ \ + /* CPU: w7 = (u32)(0xFFFFFFFF + 1) = 0, zext -> r7 = 0 */ \ + /* With fix: r7 64-bit = [0, 0] (zext applied) */ \ + /* Without fix: r7 64-bit = [0x100000000] (no zext) */ \ + r7 >>= 32; \ + if r7 == 0 goto l0_%=; \ + r0 /= 0; /* unreachable with fix */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Test that sync_linked_regs() skips propagation when one register used + * alu32 (BPF_ADD_CONST32) and the other used alu64 (BPF_ADD_CONST64). + * The delta relationship doesn't hold across different ALU widths. + */ +SEC("socket") +__failure __msg("div by zero") +__naked void scalars_alu32_alu64_cross_type(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + w7 += 1; /* alu32: BPF_ADD_CONST32, delta = 1 */ \ + r8 = r6; /* linked: same id as r6 */ \ + r8 += 2; /* alu64: BPF_ADD_CONST64, delta = 2 */ \ + r9 = 0xFFFFffff ll; \ + if r7 < r9 goto l0_%=; \ + /* r7 = 0xFFFFFFFF */ \ + /* sync: known_reg=r7 (ADD_CONST32), reg=r8 (ADD_CONST64) */ \ + /* Without fix: r8 = zext(0xFFFFFFFF + 1) = 0 */ \ + /* With fix: r8 stays [2, 0x100000001] (r8 >= 2) */ \ + if r8 > 0 goto l1_%=; \ + goto l0_%=; \ +l1_%=: \ + r0 /= 0; /* div by zero */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Test that regsafe() prevents pruning when two paths reach the same program + * point with linked registers carrying different ADD_CONST flags (one + * BPF_ADD_CONST32 from alu32, another BPF_ADD_CONST64 from alu64). + */ +SEC("socket") +__failure __msg("div by zero") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void scalars_alu32_alu64_regsafe_pruning(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w6 = w0; /* r6 in [0, 0xFFFFFFFF] */ \ + r7 = r6; /* linked: same id as r6 */ \ + /* Get another random value for the path branch */ \ + call %[bpf_get_prandom_u32]; \ + if r0 > 0 goto l_pathb_%=; \ + /* Path A: alu32 */ \ + w7 += 1; /* BPF_ADD_CONST32, delta = 1 */\ + goto l_merge_%=; \ +l_pathb_%=: \ + /* Path B: alu64 */ \ + r7 += 1; /* BPF_ADD_CONST64, delta = 1 */\ +l_merge_%=: \ + /* Merge point: regsafe() compares path B against cached path A. */ \ + /* Narrow r6 to trigger sync_linked_regs for r7 */ \ + r9 = 0xFFFFffff ll; \ + if r6 < r9 goto l0_%=; \ + /* r6 = 0xFFFFFFFF */ \ + /* sync: r7 = 0xFFFFFFFF + 1 = 0x100000000 */ \ + /* Path A: zext -> r7 = 0 */ \ + /* Path B: no zext -> r7 = 0x100000000 */ \ + r7 >>= 32; \ + if r7 == 0 goto l0_%=; \ + r0 /= 0; /* div by zero on path B */ \ +l0_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + SEC("socket") __success void alu32_negative_offset(void) @@ -363,4 +471,68 @@ void alu32_negative_offset(void) __sink(path[0]); } +void dummy_calls(void) +{ + bpf_iter_num_new(0, 0, 0); + bpf_iter_num_next(0); + bpf_iter_num_destroy(0); +} + +SEC("socket") +__success +__flag(BPF_F_TEST_STATE_FREQ) +int spurious_precision_marks(void *ctx) +{ + struct bpf_iter_num iter; + + asm volatile( + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 4f;" + "r7 = *(u32 *)(r0 + 0);" + "r8 = *(u32 *)(r0 + 0);" + /* This jump can't be predicted and does not change r7 or r8 state. */ + "if r7 > r8 goto 2f;" + /* Branch explored first ties r2 and r7 as having the same id. */ + "r2 = r7;" + "goto 3f;" + "2:" + /* Branch explored second does not tie r2 and r7 but has a function call. */ + "call %[bpf_get_prandom_u32];" + "3:" + /* + * A checkpoint. + * When first branch is explored, this would inject linked registers + * r2 and r7 into the jump history. + * When second branch is explored, this would be a cache hit point, + * triggering propagate_precision(). + */ + "if r7 <= 42 goto +0;" + /* + * Mark r7 as precise using an if condition that is always true. + * When reached via the second branch, this triggered a bug in the backtrack_insn() + * because r2 (tied to r7) was propagated as precise to a call. + */ + "if r7 <= 0xffffFFFF goto +0;" + "goto 1b;" + "4:" + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_common, "r7", "r8" + ); + + return 0; +} + char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 1fe090c..4794903 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -5,6 +5,13 @@ #include "../../../include/linux/filter.h" #include "bpf_misc.h" +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} precision_map SEC(".maps"); + SEC("?raw_tp") __success __log_level(2) __msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") @@ -301,4 +308,338 @@ __naked int bpf_neg_5(void) ::: __clobber_all); } +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_add((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_fetch_add_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[fetch_add_insn];" /* r2 = atomic_fetch_add(*(u64 *)(r10 - 8), r2) */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(fetch_add_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_xchg((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_xchg_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[xchg_insn];" /* r2 = atomic_xchg(*(u64 *)(r10 - 8), r2) */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(xchg_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_or((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_fetch_or_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[fetch_or_insn];" /* r2 = atomic_fetch_or(*(u64 *)(r10 - 8), r2) */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(fetch_or_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_and((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_fetch_and_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[fetch_and_insn];" /* r2 = atomic_fetch_and(*(u64 *)(r10 - 8), r2) */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(fetch_and_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_xor((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_fetch_xor_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[fetch_xor_insn];" /* r2 = atomic_fetch_xor(*(u64 *)(r10 - 8), r2) */ + "r3 = r10;" + "r3 += r2;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(fetch_xor_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r3 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r0 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_cmpxchg_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r0 = 0;" + "r2 = 0;" + ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r10 - 8), r0, r2) */ + "r3 = r10;" + "r3 += r0;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(cmpxchg_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +/* Regression test for dual precision: Both the fetched value (r2) and + * a reread of the same stack slot (r3) are tracked for precision. After + * the atomic operation, the stack slot is STACK_MISC. Thus, the ldx at + * insn 4 does NOT set INSN_F_STACK_ACCESS. Precision for the stack slot + * propagates solely through the atomic fetch's load side (insn 3). + */ +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2,r3 stack= before 4: (79) r3 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (db) r2 = atomic64_fetch_add((u64 *)(r10 -8), r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_fetch_add_dual_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = 0;" + ".8byte %[fetch_add_insn];" /* r2 = atomic_fetch_add(*(u64 *)(r10 - 8), r2) */ + "r3 = *(u64 *)(r10 - 8);" + "r4 = r2;" + "r4 += r3;" + "r4 &= 7;" + "r5 = r10;" + "r5 += r4;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(fetch_add_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r0,r3 stack= before 5: (79) r3 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r2)") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (b7) r2 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 2: (b7) r0 = 8") +__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") +__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") +__naked int bpf_atomic_cmpxchg_dual_precision(void) +{ + asm volatile ( + "r1 = 8;" + "*(u64 *)(r10 - 8) = r1;" + "r0 = 8;" + "r2 = 0;" + ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r10 - 8), r0, r2) */ + "r3 = *(u64 *)(r10 - 8);" + "r4 = r0;" + "r4 += r3;" + "r4 &= 7;" + "r5 = r10;" + "r5 += r4;" /* mark_precise */ + "r0 = 0;" + "exit;" + : + : __imm_insn(cmpxchg_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r1 stack= before 10: (57) r1 &= 7") +__msg("mark_precise: frame0: regs=r1 stack= before 9: (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)") +__not_msg("falling back to forcing all scalars precise") +__naked int bpf_atomic_fetch_add_map_precision(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[precision_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r1 = 0;" + ".8byte %[fetch_add_insn];" /* r1 = atomic_fetch_add(*(u64 *)(r0 + 0), r1) */ + "r1 &= 7;" + "r2 = r10;" + "r2 += r1;" /* mark_precise */ + "1: r0 = 0;" + "exit;" + : + : __imm_addr(precision_map), + __imm(bpf_map_lookup_elem), + __imm_insn(fetch_add_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r0 stack= before 12: (57) r0 &= 7") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (db) r0 = atomic64_cmpxchg((u64 *)(r6 +0), r0, r1)") +__not_msg("falling back to forcing all scalars precise") +__naked int bpf_atomic_cmpxchg_map_precision(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[precision_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r6 = r0;" + "r0 = 0;" + "r1 = 0;" + ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u64 *)(r6 + 0), r0, r1) */ + "r0 &= 7;" + "r2 = r10;" + "r2 += r0;" /* mark_precise */ + "1: r0 = 0;" + "exit;" + : + : __imm_addr(precision_map), + __imm(bpf_map_lookup_elem), + __imm_insn(cmpxchg_insn, + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_6, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r1 stack= before 10: (57) r1 &= 7") +__msg("mark_precise: frame0: regs=r1 stack= before 9: (c3) r1 = atomic_fetch_add((u32 *)(r0 +0), r1)") +__not_msg("falling back to forcing all scalars precise") +__naked int bpf_atomic_fetch_add_32bit_precision(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[precision_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r1 = 0;" + ".8byte %[fetch_add_insn];" /* r1 = atomic_fetch_add(*(u32 *)(r0 + 0), r1) */ + "r1 &= 7;" + "r2 = r10;" + "r2 += r1;" /* mark_precise */ + "1: r0 = 0;" + "exit;" + : + : __imm_addr(precision_map), + __imm(bpf_map_lookup_elem), + __imm_insn(fetch_add_insn, + BPF_ATOMIC_OP(BPF_W, BPF_ADD | BPF_FETCH, BPF_REG_0, BPF_REG_1, 0)) + : __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r0 stack= before 12: (57) r0 &= 7") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (c3) r0 = atomic_cmpxchg((u32 *)(r6 +0), r0, r1)") +__not_msg("falling back to forcing all scalars precise") +__naked int bpf_atomic_cmpxchg_32bit_precision(void) +{ + asm volatile ( + "r1 = 0;" + "*(u64 *)(r10 - 8) = r1;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[precision_map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 == 0 goto 1f;" + "r6 = r0;" + "r0 = 0;" + "r1 = 0;" + ".8byte %[cmpxchg_insn];" /* r0 = atomic_cmpxchg(*(u32 *)(r6 + 0), r0, r1) */ + "r0 &= 7;" + "r2 = r10;" + "r2 += r0;" /* mark_precise */ + "1: r0 = 0;" + "exit;" + : + : __imm_addr(precision_map), + __imm(bpf_map_lookup_elem), + __imm_insn(cmpxchg_insn, + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_6, BPF_REG_1, 0)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 3072fee..58c7704 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -40,6 +40,9 @@ __naked void linked_regs_bpf_k(void) */ "r3 = r10;" "r3 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -73,6 +76,9 @@ __naked void linked_regs_bpf_x_src(void) */ "r4 = r10;" "r4 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -106,6 +112,10 @@ __naked void linked_regs_bpf_x_dst(void) */ "r4 = r10;" "r4 += r3;" + /* Mark r1 and r2 as alive. */ + "r0 = r0;" + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -143,6 +153,9 @@ __naked void linked_regs_broken_link(void) */ "r3 = r10;" "r3 += r0;" + /* Mark r1 and r2 as alive. */ + "r1 = r1;" + "r2 = r2;" "r0 = 0;" "exit;" : @@ -156,16 +169,16 @@ __naked void linked_regs_broken_link(void) */ SEC("socket") __success __log_level(2) -__msg("12: (0f) r2 += r1") +__msg("17: (0f) r2 += r1") /* Current state */ -__msg("frame2: last_idx 12 first_idx 11 subseq_idx -1 ") -__msg("frame2: regs=r1 stack= before 11: (bf) r2 = r10") +__msg("frame2: last_idx 17 first_idx 14 subseq_idx -1 ") +__msg("frame2: regs=r1 stack= before 16: (bf) r2 = r10") __msg("frame2: parent state regs=r1 stack=") __msg("frame1: parent state regs= stack=") __msg("frame0: parent state regs= stack=") /* Parent state */ -__msg("frame2: last_idx 10 first_idx 10 subseq_idx 11 ") -__msg("frame2: regs=r1 stack= before 10: (25) if r1 > 0x7 goto pc+0") +__msg("frame2: last_idx 13 first_idx 13 subseq_idx 14 ") +__msg("frame2: regs=r1 stack= before 13: (25) if r1 > 0x7 goto pc+0") __msg("frame2: parent state regs=r1 stack=") /* frame1.r{6,7} are marked because mark_precise_scalar_ids() * looks for all registers with frame2.r1.id in the current state @@ -173,20 +186,20 @@ __msg("frame2: parent state regs=r1 stack=") __msg("frame1: parent state regs=r6,r7 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10") -__msg("frame2: regs=r1 stack= before 8: (85) call pc+1") +__msg("frame2: last_idx 9 first_idx 9 subseq_idx 13") +__msg("frame2: regs=r1 stack= before 9: (85) call pc+3") /* frame1.r1 is marked because of backtracking of call instruction */ __msg("frame1: parent state regs=r1,r6,r7 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8") -__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1") -__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1") +__msg("frame1: last_idx 8 first_idx 7 subseq_idx 9") +__msg("frame1: regs=r1,r6,r7 stack= before 8: (bf) r7 = r1") +__msg("frame1: regs=r1,r6 stack= before 7: (bf) r6 = r1") __msg("frame1: parent state regs=r1 stack=") __msg("frame0: parent state regs=r6 stack=") /* Parent state */ -__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6") -__msg("frame1: regs=r1 stack= before 4: (85) call pc+1") +__msg("frame1: last_idx 4 first_idx 4 subseq_idx 7") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+2") __msg("frame0: parent state regs=r1,r6 stack=") /* Parent state */ __msg("frame0: last_idx 3 first_idx 1 subseq_idx 4") @@ -204,6 +217,7 @@ __naked void precision_many_frames(void) "r1 = r0;" "r6 = r0;" "call precision_many_frames__foo;" + "r6 = r6;" /* mark r6 as live */ "exit;" : : __imm(bpf_ktime_get_ns) @@ -220,6 +234,8 @@ void precision_many_frames__foo(void) "r6 = r1;" "r7 = r1;" "call precision_many_frames__bar;" + "r6 = r6;" /* mark r6 as live */ + "r7 = r7;" /* mark r7 as live */ "exit" ::: __clobber_all); } @@ -229,6 +245,8 @@ void precision_many_frames__bar(void) { asm volatile ( "if r1 > 7 goto +0;" + "r6 = 0;" /* mark r6 as live */ + "r7 = 0;" /* mark r7 as live */ /* force r1 to be precise, this eventually marks: * - bar frame r1 * - foo frame r{1,6,7} @@ -340,6 +358,8 @@ __naked void precision_two_ids(void) "r3 += r7;" /* force r9 to be precise, this also marks r8 */ "r3 += r9;" + "r6 = r6;" /* mark r6 as live */ + "r8 = r8;" /* mark r8 as live */ "exit;" : : __imm(bpf_ktime_get_ns) @@ -353,7 +373,7 @@ __flag(BPF_F_TEST_STATE_FREQ) * collect_linked_regs() can't tie more than 6 registers for a single insn. */ __msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") -__msg("9: (bf) r6 = r6 ; R6=scalar(id=2") +__msg("14: (bf) r6 = r6 ; R6=scalar(id=2") /* check that r{0-5} are marked precise after 'if' */ __msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0") __msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:") @@ -372,6 +392,12 @@ __naked void linked_regs_too_many_regs(void) "r6 = r0;" /* propagate range for r{0-6} */ "if r0 > 7 goto +0;" + /* keep r{1-5} live */ + "r1 = r1;" + "r2 = r2;" + "r3 = r3;" + "r4 = r4;" + "r5 = r5;" /* make r6 appear in the log */ "r6 = r6;" /* force r0 to be precise, @@ -517,7 +543,7 @@ __naked void check_ids_in_regsafe_2(void) "*(u64*)(r10 - 8) = r1;" /* r9 = pointer to stack */ "r9 = r10;" - "r9 += -8;" + "r9 += -16;" /* r8 = ktime_get_ns() */ "call %[bpf_ktime_get_ns];" "r8 = r0;" @@ -538,6 +564,8 @@ __naked void check_ids_in_regsafe_2(void) "if r7 > 4 goto l2_%=;" /* Access memory at r9[r6] */ "r9 += r6;" + "r9 += r7;" + "r9 += r8;" "r0 = *(u8*)(r9 + 0);" "l2_%=:" "r0 = 0;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 148d229..fd59d57 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -1209,6 +1209,64 @@ __naked void smod32_ri_divisor_neg_1(void) : __clobber_all); } +SEC("socket") +__description("SDIV32, INT_MIN divided by 2, imm") +__success __success_unpriv __retval(-1073741824) +__naked void sdiv32_int_min_div_2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s/= 2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SDIV32, INT_MIN divided by 2, reg") +__success __success_unpriv __retval(-1073741824) +__naked void sdiv32_int_min_div_2_reg(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SMOD32, INT_MIN modulo 2, imm") +__success __success_unpriv __retval(0) +__naked void smod32_int_min_mod_2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s%%= 2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + +SEC("socket") +__description("SMOD32, INT_MIN modulo -2, imm") +__success __success_unpriv __retval(0) +__naked void smod32_int_min_mod_neg2_imm(void) +{ + asm volatile (" \ + w0 = %[int_min]; \ + w0 s%%= -2; \ + exit; \ +" : + : __imm_const(int_min, INT_MIN) + : __clobber_all); +} + + #else SEC("socket")
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 02a85dd..0929f4a 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1261,14 +1261,8 @@ int get_bpf_max_tramp_links(void) return ret; } -#define MAX_BACKTRACE_SZ 128 -void crash_handler(int signum) +static void dump_crash_log(void) { - void *bt[MAX_BACKTRACE_SZ]; - size_t sz; - - sz = backtrace(bt, ARRAY_SIZE(bt)); - fflush(stdout); stdout = env.stdout_saved; stderr = env.stderr_saved; @@ -1277,12 +1271,32 @@ void crash_handler(int signum) env.test_state->error_cnt++; dump_test_log(env.test, env.test_state, true, false, NULL); } +} + +#define MAX_BACKTRACE_SZ 128 + +void crash_handler(int signum) +{ + void *bt[MAX_BACKTRACE_SZ]; + size_t sz; + + sz = backtrace(bt, ARRAY_SIZE(bt)); + + dump_crash_log(); + if (env.worker_id != -1) fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +#ifdef __SANITIZE_ADDRESS__ +void __asan_on_error(void) +{ + dump_crash_log(); +} +#endif + void hexdump(const char *prefix, const void *buf, size_t len) { for (int i = 0; i < len; i++) { @@ -1799,7 +1813,7 @@ static int worker_main_send_subtests(int sock, struct test_state *state) msg.subtest_done.num = i; - strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); + strscpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); msg.subtest_done.error_cnt = subtest_state->error_cnt; msg.subtest_done.skipped = subtest_state->skipped; @@ -1944,13 +1958,15 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; + int err, i; + +#ifndef __SANITIZE_ADDRESS__ struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; - int err, i; - + }; sigaction(SIGSEGV, &sigact, NULL); +#endif env.stdout_saved = stdout; env.stderr_saved = stderr;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 27db34e..a8ae03c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1320,7 +1320,7 @@ static bool cmp_str_seq(const char *log, const char *exp) printf("FAIL\nTestcase bug\n"); return false; } - strncpy(needle, exp, len); + memcpy(needle, exp, len); needle[len] = 0; q = strstr(log, needle); if (!q) {
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 16eb37e..66af0d1 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -212,6 +212,7 @@ int parse_test_list_file(const char *path, break; } + free(buf); fclose(f); return err; }
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index eeaab70..0e63daf 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -24,12 +24,6 @@ #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -struct ksyms { - struct ksym *syms; - size_t sym_cap; - size_t sym_cnt; -}; - static struct ksyms *ksyms; static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -54,6 +48,8 @@ void free_kallsyms_local(struct ksyms *ksyms) if (!ksyms) return; + free(ksyms->filtered_syms); + if (!ksyms->syms) { free(ksyms); return; @@ -610,7 +606,7 @@ static int search_kallsyms_compare(const void *p1, const struct ksym *p2) return compare_name(p1, p2->name); } -int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) +int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel) { size_t cap = 0, cnt = 0; char *name = NULL, *ksym_name, **syms = NULL; @@ -637,8 +633,10 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) else f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - if (!f) + if (!f) { + free_kallsyms_local(ksyms); return -EINVAL; + } map = hashmap__new(symbol_hash, symbol_equal, NULL); if (IS_ERR(map)) { @@ -679,15 +677,18 @@ int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) syms[cnt++] = ksym_name; } - *symsp = syms; - *cntp = cnt; + ksyms->filtered_syms = syms; + ksyms->filtered_cnt = cnt; + *ksymsp = ksyms; error: free(name); fclose(f); hashmap__free(map); - if (err) + if (err) { free(syms); + free_kallsyms_local(ksyms); + } return err; }
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index a5576b2..d5bf143 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -23,7 +23,14 @@ struct ksym { long addr; char *name; }; -struct ksyms; + +struct ksyms { + struct ksym *syms; + size_t sym_cap; + size_t sym_cnt; + char **filtered_syms; + size_t filtered_cnt; +}; typedef int (*ksym_cmp_t)(const void *p1, const void *p2); typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2); @@ -53,7 +60,7 @@ ssize_t get_rel_offset(uintptr_t addr); int read_build_id(const char *path, char *build_id, size_t size); -int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel); +int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel); int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); #endif
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 061d98f..a924210 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -44,9 +44,9 @@ mark_precise: frame0: regs=r2 stack= before 23\ mark_precise: frame0: regs=r2 stack= before 22\ mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2,r9 stack=:\ + mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ - mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r2 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: regs=r0,r9 stack= before 15\ @@ -107,9 +107,9 @@ mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2,r9 stack=:\ + mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 19 first_idx 17\ - mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r2 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: parent state regs= stack=:",
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1be1e35..75f85e0 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c
@@ -3378,6 +3378,8 @@ int main(int argc, char **argv) } } free(env.presets[i].atoms); + if (env.presets[i].value.type == ENUMERATOR) + free(env.presets[i].value.svalue); } free(env.presets); return -err;
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index 595c791..a27ed663 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -16,6 +16,7 @@ #include <network_helpers.h> +#include "bpf_util.h" #include "xdp_features.skel.h" #include "xdp_features.h" @@ -212,7 +213,7 @@ static void set_env_default(void) env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT; env.feature.action = -EINVAL; env.ifindex = -ENODEV; - strcpy(env.ifname, "unknown"); + strscpy(env.ifname, "unknown"); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT, &env.dut_ctrl_addr, NULL); make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 3d8de0d..6db3b55 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -550,7 +550,7 @@ static int rxq_num(const char *ifname) struct ifreq ifr = { .ifr_data = (void *)&ch, }; - strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + strscpy(ifr.ifr_name, ifname); int fd, ret; fd = socket(AF_UNIX, SOCK_DGRAM, 0); @@ -571,7 +571,7 @@ static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *c struct ifreq ifr = { .ifr_data = (void *)cfg, }; - strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); + strscpy(ifr.ifr_name, ifname); int fd, ret; fd = socket(AF_UNIX, SOCK_DGRAM, 0);
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index ce6c264..6a72953 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -123,6 +123,21 @@ int cg_read_strcmp(const char *cgroup, const char *control, return ret; } +int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected) +{ + int i, ret; + + for (i = 0; i < 100; i++) { + ret = cg_read_strcmp(cgroup, control, expected); + if (!ret) + return ret; + usleep(10000); + } + + return ret; +} + int cg_read_strstr(const char *cgroup, const char *control, const char *needle) { char buf[PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 77f386d..567b108 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -61,6 +61,8 @@ extern int cg_read(const char *cgroup, const char *control, char *buf, size_t len); extern int cg_read_strcmp(const char *cgroup, const char *control, const char *expected); +extern int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected); extern int cg_read_strstr(const char *cgroup, const char *control, const char *needle); extern long cg_read_long(const char *cgroup, const char *control);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 1022625..7b83c7e 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c
@@ -233,7 +233,8 @@ static int test_cgcore_populated(const char *root) if (err) goto cleanup; - if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) + if (cg_read_strcmp_wait(cg_test_d, "cgroup.events", + "populated 0\n")) goto cleanup; /* Remove cgroup. */
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 5dff3ad..a56f415 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -196,7 +196,6 @@ # P<v> = set cpus.partition (0:member, 1:root, 2:isolated) # C<l> = add cpu-list to cpuset.cpus # X<l> = add cpu-list to cpuset.cpus.exclusive -# S<p> = use prefix in subtree_control # T = put a task into cgroup # CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O<c>=<v> = Write <v> to CPU online file of <c> @@ -209,44 +208,46 @@ # sched-debug matching which includes offline CPUs and single-CPU partitions # while the second one is for matching cpuset.cpus.isolated. # -SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" +SETUP_A123_PARTITIONS="C1-3:P1 C2-3:P1 C3:P1" TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- - " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" + " C0-1 . . C2-3 . C4-5 . . 0 A2:0-1" " C0-1 . . C2-3 P1 . . . 0 " - " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " - " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " - " C0-1:S+ . . C2-3 . . . P1 0 " - " C0-1:P1 . . C2-3 S+ C1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " - " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " + " C0-1 . . C2-3 P1 C0-1:P1 . . 0 " + " C0-1 . . C2-3 P1 C1:P1 . . 0 " + " C0-1 . . C2-3 . . . P1 0 " + " C0-1:P1 . . C2-3 . C1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . . 0 " + " C0-1:P1 . . C2-3 . C1:P1 . P1 0 " " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" - " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + " C0-3:P1 C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + " C0-3:P1 C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + " C2-3:P1 C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + " C2-3:P1 C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + " C2-3:P1 C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + " C0-1 . . C2-3 . C4-5 . O2=0 0 A1:0-1|B1:3" + " C0-3:P1 C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + " C0-3:P1 C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + " C0-3:P1 C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + " C2-3:P1 C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + " C2-3:P1 C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + " C2-3:P1 C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + " C2-3:P1 C3:P2 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-2" + " C1-3:P1 C3:P2 . . . T:O3=0 . . 0 A1:1-2|A2:1-2 A1:P1|A2:P-2 3|" + " C1-3:P1 C3:P2 . . . T:O3=0 O3=1 . 0 A1:1-2|A2:3 A1:P1|A2:P2 3" "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" @@ -264,88 +265,87 @@ # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2" - " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2" + " C0-3 C1-3 C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3 C1-3 C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3 C1-3 C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3 X1-3 X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3 X1-3 X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + " C0-3 C1-3 C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ A1:P0|A2:P1|A3:P2|B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + " C0-3 C1-3 C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ A1:P0|A2:P1|A3:P0|B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + " C0-3 C1-3 C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ A1:P0|A2:P2|A3:P1 2-4|2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + " C0-4 C1-4 C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ A1:P0|A2:P2|A3:P1 2" - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ A1:P0|A2:P-2|A3:P-1 ." - " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + " C0-4:X2-4 C1-4:X2-4:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3 C1-3 C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3 C1-3 C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." + " C0-3 C1-3 C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" + " C0-3 C1-3 C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." - " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" + " C0-3 C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." + " C0-3 C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" + " C1-3:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" - " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + " C0-3:X1-3:P2 C1-3:X2-3:P2 C2-3:X3:P2 \ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" + " C0-5:P2 C4-5:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:P2 C4-5:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ A1:P0|A3:P-2 ." - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ A1:P0|A3:P-2 ." - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ A1:P0|A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ A1:P0|A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + " C0-3:X2-3 C1-3:X2-3 C2-3:X2-3:P2 \ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ A1:P0|A3:P-2" @@ -356,37 +356,37 @@ # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + " C2-3:P1 C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1 C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + " C2-3:P1 C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + " C2-3:P1 . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" + " C0-1:P1 C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" + " C0-1:P1 . . C2-3 . C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" + " C0-1 C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" + " C0-1 C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1 C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will not invalidate its siblings partition. " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0" @@ -398,23 +398,23 @@ # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1 P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1 P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1 P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + " C1-4:X3 C1:X3 . . . C . . 0 A2:1-4|XA2:3" # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive # but creating a local partition out of it is not allowed. Similarly and change # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will # invalidate it. - " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + " CX1-4 CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ A1:P0|A2:P2:B1:P1 2-4" - " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + " CX1-4 CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ A1:P0|A2:P2:B1:P-1 2-4" - " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + " CX1-4 CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ A1:P0|A2:P2:B1:P-1 2-4" # When multiple partitions with conflicting cpuset.cpus are created, the @@ -426,14 +426,14 @@ " C1-3:X1-3 . . C4-5 . . . C1-2 0 A1:1-3|B1:1-2" # cpuset.cpus can become empty with task in it as it inherits parent's effective CPUs - " C1-3:S+ C2 . . . T:C . . 0 A1:1-3|A2:1-3" + " C1-3 C2 . . . T:C . . 0 A1:1-3|A2:1-3" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" + " C2-3:P1 C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" @@ -465,31 +465,31 @@ # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS # ------ ------ ------- ------- ------- ------- ----- ------ -------- - " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + " X1-3 X4-6 X1-2 X3 X4-5 X6 \ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ c11:P2|c12:P2|c21:P2|c22:P2 1-6" - " CX1-4:S+ . X1-2:P2 C3 . . \ + " CX1-4 . X1-2:P2 C3 . . \ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ p1:P0|c11:P2|c12:P0 1-2" - " CX1-4:S+ . X1-2:P2 . . . \ + " CX1-4 . X1-2:P2 . . . \ X2-4 . . . . . p1:1,3-4|c11:2 \ p1:P0|c11:P2 2" - " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + " CX1-5 . X1-2:P2 X3-5:P1 . . \ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ p1:P0|c11:P2|c12:P1 2" - " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + " CX1-4 . X1-2:P2 X3-4:P1 . . \ . . X2 . . . p1:1|c11:2|c12:3-4 \ p1:P0|c11:P2|c12:P1 2" # p1 as member, will get its effective CPUs from its parent rtest - " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + " CX1-4 . X1-2:P2 X3-4:P1 . . \ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ p1:P0|c11:P2|c12:P1 1" - " CX1-4:S+ X5-6:P1:S+ . . . . \ - . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + " CX1-4 X5-6:P1 . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ 1-2,4-6|1-2,5-6" # c12 whose cpuset.cpus CPUs are all granted to c11 will become invalid partition - " C1-5:P1:S+ . C1-4:P1 C2-3 . . \ + " C1-5:P1 . C1-4:P1 C2-3 . . \ . . . P1 . . p1:5|c11:1-4|c12:5 \ p1:P1|c11:P1|c12:P-1" ) @@ -530,7 +530,6 @@ CGRP=$1 STATE=$2 SHOWERR=${3} - CTRL=${CTRL:=$CONTROLLER} HASERR=0 REDIRECT="2> $TMPMSG" [[ -z "$STATE" || "$STATE" = '.' ]] && return 0 @@ -540,15 +539,16 @@ for CMD in $(echo $STATE | sed -e "s/:/ /g") do TFILE=$CGRP/cgroup.procs - SFILE=$CGRP/cgroup.subtree_control PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - case $CMD in - S*) PREFIX=${CMD#?} - COMM="echo ${PREFIX}${CTRL} > $SFILE" + + # Enable cpuset controller if not enabled yet + [[ -f $CFILE ]] || { + COMM="echo +cpuset > $CGRP/../cgroup.subtree_control" eval $COMM $REDIRECT - ;; + } + case $CMD in X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" @@ -764,7 +764,7 @@ # only CPUs in isolated partitions as well as those that are isolated at # boot time. # -# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>} +# $1 - expected isolated cpu list(s) <isolcpus1>{|<isolcpus2>} # <isolcpus1> - expected sched/domains value # <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined # @@ -773,6 +773,7 @@ EXPECTED_ISOLCPUS=$1 ISCPUS=${CGROUP2}/cpuset.cpus.isolated ISOLCPUS=$(cat $ISCPUS) + HKICPUS=$(cat /sys/devices/system/cpu/isolated) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains if [[ $EXPECTED_ISOLCPUS = . ]] @@ -811,6 +812,11 @@ EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # + # The inverse of HK_TYPE_DOMAIN cpumask in $HKICPUS should match $ISOLCPUS + # + [[ "$ISOLCPUS" != "$HKICPUS" ]] && return 1 + + # # Use the sched domain in debugfs to check isolated CPUs, if available # [[ -d $SCHED_DOMAINS ]] || return 0 @@ -947,7 +953,6 @@ run_state_test() { TEST=$1 - CONTROLLER=cpuset CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 @@ -1003,7 +1008,6 @@ run_remote_state_test() { TEST=$1 - CONTROLLER=cpuset [[ -d rtest ]] || mkdir rtest cd rtest echo +cpuset > cgroup.subtree_control
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c index c8c9d30..f6cd23a 100644 --- a/tools/testing/selftests/cgroup/test_kill.c +++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -86,7 +86,7 @@ static int test_cgkill_simple(const char *root) wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup) @@ -190,7 +190,8 @@ static int test_cgkill_tree(const char *root) wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup[0], "cgroup.events", + "populated 0\n")) ret = KSFT_FAIL; for (i = 9; i >= 0 && cgroup[i]; i--) { @@ -251,7 +252,7 @@ static int test_cgkill_forkbomb(const char *root) wait_for_pid(pid); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup)
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ed7e405..b9b7527 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -4,13 +4,15 @@ import datetime import random import re +import time from lib.py import ksft_run, ksft_pr, ksft_exit from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx +from lib.py import ksft_disruptive from lib.py import rand_port -from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure +from lib.py import cmd, ethtool, ip, defer, GenerateTraffic, CmdExitFailure, wait_file def _rss_key_str(key): @@ -809,6 +811,98 @@ 'noise' : (0, 1) }) +@ksft_disruptive +def test_rss_context_persist_ifupdown(cfg, pre_down=False): + """ + Test that RSS contexts and their associated ntuple filters persist across + an interface down/up cycle. + + """ + + require_ntuple(cfg) + + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 6: + try: + ethtool(f"-L {cfg.ifname} combined 6") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + ifup = defer(ip, f"link set dev {cfg.ifname} up") + if pre_down: + ip(f"link set dev {cfg.ifname} down") + + try: + ctx1_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx1_id} delete") + except CmdExitFailure as exc: + raise KsftSkipEx("Create context not supported with interface down") from exc + + ctx2_id = ethtool_create(cfg, "-X", "context new start 4 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx2_id} delete") + + port_ctx2 = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_ctx2} context {ctx2_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + if not pre_down: + ip(f"link set dev {cfg.ifname} down") + ifup.exec() + + wait_file(f"/sys/class/net/{cfg.ifname}/carrier", + lambda x: x.strip() == "1", deadline=20) + + remote_addr = cfg.remote_addr_v[cfg.addr_ipver] + for _ in range(10): + if cmd(f"ping -c 1 -W 1 {remote_addr}", fail=False).ret == 0: + break + time.sleep(1) + else: + raise KsftSkipEx("Cannot reach remote host after interface up") + + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + + data1 = [c for c in ctxs if c.get('context') == ctx1_id] + ksft_eq(len(data1), 1, f"Context {ctx1_id} should persist after ifup") + + data2 = [c for c in ctxs if c.get('context') == ctx2_id] + ksft_eq(len(data2), 1, f"Context {ctx2_id} should persist after ifup") + + _ntuple_rule_check(cfg, ntuple_id, ctx2_id) + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + main_traffic = sum(cnts[0:2]) + ksft_ge(main_traffic, 18000, f"Main context traffic distribution: {cnts}") + ksft_lt(sum(cnts[2:6]), 500, f"Other context queues should be mostly empty: {cnts}") + + _send_traffic_check(cfg, port_ctx2, f"context {ctx2_id}", + {'target': (4, 5), + 'noise': (0, 1), + 'empty': (2, 3)}) + + +def test_rss_context_persist_create_and_ifdown(cfg): + """ + Create RSS contexts then cycle the interface down and up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=False) + + +def test_rss_context_persist_ifdown_and_create(cfg): + """ + Bring interface down first, then create RSS contexts and bring up. + """ + test_rss_context_persist_ifupdown(cfg, pre_down=True) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -823,7 +917,9 @@ test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition, - test_rss_default_context_rule], + test_rss_default_context_rule, + test_rss_context_persist_create_and_ifdown, + test_rss_context_persist_ifdown_and_create], args=(cfg, )) ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 1340b3df9..02d6f51 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -3,8 +3,10 @@ TEST_PROGS := \ dev_addr_lists.sh \ + non_ether_header_ops.sh \ options.sh \ propagation.sh \ + refleak.sh \ # end of TEST_PROGS TEST_INCLUDES := \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 558e1d0..5d36a22 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,7 +1,9 @@ +CONFIG_BONDING=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh new file mode 100755 index 0000000..948a435 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/non_ether_header_ops.sh
@@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Reproduce the non-Ethernet header_ops confusion scenario with: +# g0 (gre) -> b0 (bond) -> t0 (team) +# +# Before the fix, direct header_ops inheritance in this stack could call +# callbacks with the wrong net_device context and crash. + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +setup_ns ns1 + +ip -n "$ns1" link add d0 type dummy +ip -n "$ns1" addr add 10.10.10.1/24 dev d0 +ip -n "$ns1" link set d0 up + +ip -n "$ns1" link add g0 type gre local 10.10.10.1 +ip -n "$ns1" link add b0 type bond mode active-backup +ip -n "$ns1" link add t0 type team + +ip -n "$ns1" link set g0 master b0 +ip -n "$ns1" link set b0 master t0 + +ip -n "$ns1" link set g0 up +ip -n "$ns1" link set b0 up +ip -n "$ns1" link set t0 up + +# IPv6 address assignment triggers MLD join reports that call +# dev_hard_header() on t0, exercising the inherited header_ops path. +ip -n "$ns1" -6 addr add 2001:db8:1::1/64 dev t0 nodad +for i in $(seq 1 20); do + ip netns exec "$ns1" ping -6 -I t0 ff02::1 -c1 -W1 &>/dev/null || true +done + +echo "PASS: non-Ethernet header_ops stacking did not crash" +exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/team/refleak.sh b/tools/testing/selftests/drivers/net/team/refleak.sh new file mode 100755 index 0000000..ef08213 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/refleak.sh
@@ -0,0 +1,17 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/lib.sh + +trap cleanup_all_ns EXIT + +# Test that there is no reference count leak and that dummy1 can be deleted. +# https://lore.kernel.org/netdev/4d69abe1-ca8d-4f0b-bcf8-13899b211e57@I-love.SAKURA.ne.jp/ +setup_ns ns1 ns2 +ip -n "$ns1" link add name team1 type team +ip -n "$ns1" link add name dummy1 mtu 1499 type dummy +ip -n "$ns1" link set dev dummy1 master team1 +ip -n "$ns1" link set dev dummy1 netns "$ns2" +ip -n "$ns2" link del dev dummy1
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c index 61e55df..e19ff81 100644 --- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) { __u64 mnt_ns_id[MNT_NS_COUNT]; }; +static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (mnt_ns_id[i] == info->mnt_ns_id) + return true; + } + return false; +} + FIXTURE_SETUP(iterate_mount_namespaces) { for (int i = 0; i < MNT_NS_COUNT; i++) self->fd_mnt_ns[i] = -EBADF; - /* - * Creating a new user namespace let's us guarantee that we only see - * mount namespaces that we did actually create. - */ - ASSERT_EQ(unshare(CLONE_NEWUSER), 0); - for (int i = 0; i < MNT_NS_COUNT; i++) { struct mnt_ns_info info = {}; @@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_next; fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); if (fd_mnt_ns_next < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; @@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_prev; fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); if (fd_mnt_ns_prev < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; @@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward) ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } } @@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward) ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } }
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 80ab609..cdca912 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -6,8 +6,10 @@ #define __HID_BPF_HELPERS_H /* "undefine" structs and enums in vmlinux.h, because we "override" them below */ +#define bpf_wq bpf_wq___not_used #define hid_bpf_ctx hid_bpf_ctx___not_used #define hid_bpf_ops hid_bpf_ops___not_used +#define hid_device hid_device___not_used #define hid_report_type hid_report_type___not_used #define hid_class_request hid_class_request___not_used #define hid_bpf_attach_flags hid_bpf_attach_flags___not_used @@ -27,8 +29,10 @@ #include "vmlinux.h" +#undef bpf_wq #undef hid_bpf_ctx #undef hid_bpf_ops +#undef hid_device #undef hid_report_type #undef hid_class_request #undef hid_bpf_attach_flags @@ -55,6 +59,14 @@ enum hid_report_type { HID_REPORT_TYPES, }; +struct hid_device { + unsigned int id; +} __attribute__((preserve_access_index)); + +struct bpf_wq { + __u64 __opaque[2]; +}; + struct hid_bpf_ctx { struct hid_device *hid; __u32 allocated_size;
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 2d6d04f..3903f47 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -598,18 +598,6 @@ if unit_set: assert required[usage].contains(field) - def test_prop_direct(self): - """ - Todo: Verify that INPUT_PROP_DIRECT is set on display devices. - """ - pass - - def test_prop_pointer(self): - """ - Todo: Verify that INPUT_PROP_POINTER is set on opaque devices. - """ - pass - class PenTabletTest(BaseTest.TestTablet): def assertName(self, uhdev): @@ -677,6 +665,15 @@ uhdev.event(130, 240, pressure=0), [], auto_syn=False, strict=True ) + def test_prop_pointer(self): + """ + Verify that INPUT_PROP_POINTER is set and INPUT_PROP_DIRECT + is not set on opaque devices. + """ + evdev = self.uhdev.get_evdev() + assert libevdev.INPUT_PROP_POINTER in evdev.properties + assert libevdev.INPUT_PROP_DIRECT not in evdev.properties + class TestOpaqueCTLTablet(TestOpaqueTablet): def create_device(self): @@ -862,7 +859,18 @@ ) -class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest): +class DirectTabletTest(): + def test_prop_direct(self): + """ + Verify that INPUT_PROP_DIRECT is set and INPUT_PROP_POINTER + is not set on display devices. + """ + evdev = self.uhdev.get_evdev() + assert libevdev.INPUT_PROP_DIRECT in evdev.properties + assert libevdev.INPUT_PROP_POINTER not in evdev.properties + + +class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest, DirectTabletTest): ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num") def create_device(self):
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 16a119a..4afaef0 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h
@@ -76,6 +76,9 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) memset(s, c, n); } +#define KSELFTEST_PRIO_TEST_F 20000 +#define KSELFTEST_PRIO_XFAIL 20001 + #define TEST_TIMEOUT_DEFAULT 30 /* Utilities exposed to the test definitions */ @@ -465,7 +468,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) fixture_name##_teardown(_metadata, self, variant); \ } \ static struct __test_metadata *_##fixture_name##_##test_name##_object; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \ _register_##fixture_name##_##test_name(void) \ { \ struct __test_metadata *object = mmap(NULL, sizeof(*object), \ @@ -880,7 +883,7 @@ struct __test_xfail { .fixture = &_##fixture_name##_fixture_object, \ .variant = &_##fixture_name##_##variant_name##_object, \ }; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_XFAIL))) \ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ { \ _##fixture_name##_##variant_name##_##test_name##_xfail.test = \
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index fdec90e..6471fa2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test TEST_GEN_PROGS_x86 += x86/fastops_test TEST_GEN_PROGS_x86 += x86/fix_hypercall_test @@ -205,6 +206,7 @@ TEST_GEN_PROGS_s390 += s390/user_operexec TEST_GEN_PROGS_s390 += s390/keyop TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += s390/irq_routing TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 618c937..cc329b5 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -80,7 +80,7 @@ static void test_mbind(int fd, size_t total_size) { const unsigned long nodemask_0 = 1; /* nid: 0 */ unsigned long nodemask = 0; - unsigned long maxnode = 8; + unsigned long maxnode = BITS_PER_TYPE(nodemask); int policy; char *mem; int ret;
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 4ebae42..469a221 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -557,6 +557,11 @@ static inline uint64_t get_cr0(void) return cr0; } +static inline void set_cr0(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory"); +} + static inline uint64_t get_cr3(void) { uint64_t cr3; @@ -566,6 +571,11 @@ static inline uint64_t get_cr3(void) return cr3; } +static inline void set_cr3(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory"); +} + static inline uint64_t get_cr4(void) { uint64_t cr4; @@ -580,6 +590,19 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline uint64_t get_cr8(void) +{ + uint64_t cr8; + + __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8)); + return cr8; +} + +static inline void set_cr8(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory"); +} + static inline void set_idt(const struct desc_ptr *idt_desc) { __asm__ __volatile__("lidt %0"::"m"(*idt_desc));
diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h new file mode 100644 index 0000000..19337c3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/smm.h
@@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef SELFTEST_KVM_SMM_H +#define SELFTEST_KVM_SMM_H + +#include "kvm_util.h" + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t smram_gpa, + const void *smi_handler, size_t handler_size); + +void inject_smi(struct kvm_vcpu *vcpu); + +#endif /* SELFTEST_KVM_SMM_H */
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index fab18e9..23a4494 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -8,6 +8,7 @@ #include "kvm_util.h" #include "pmu.h" #include "processor.h" +#include "smm.h" #include "svm_util.h" #include "sev.h" #include "vmx.h" @@ -1444,3 +1445,28 @@ bool kvm_arch_has_default_irqchip(void) { return true; } + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t smram_gpa, + const void *smi_handler, size_t handler_size) +{ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa, + SMRAM_MEMSLOT) == smram_gpa, + "Could not allocate guest physical addresses for SMRAM"); + + memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size); + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa); +} + +void inject_smi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + vcpu_events_set(vcpu, &events); +}
diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c new file mode 100644 index 0000000..7819a0a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/irq_routing.c
@@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IRQ routing offset tests. + * + * Copyright IBM Corp. 2026 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +extern char guest_code[]; +asm("guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test(void) +{ + struct kvm_irq_routing *routing; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_paddr_t mem; + int ret; + + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + }; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0); + + routing = kvm_gsi_routing_create(); + routing->nr = 1; + routing->entries[0] = ue; + routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem; + routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem; + + routing->entries[0].u.adapter.summary_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n"); + + routing->entries[0].u.adapter.summary_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "summary offset inside of page\n"); + + routing->entries[0].u.adapter.ind_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n"); + + routing->entries[0].u.adapter.ind_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "ind offset inside of page\n"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING)); + + ksft_print_header(); + ksft_set_plan(4); + test(); + + ksft_finished(); /* Print results and exit() accordingly */ +}
diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c new file mode 100644 index 0000000..af7c901 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
@@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026, Red Hat, Inc. + * + * Test that vmx_leave_smm() validates vmcs12 controls before re-entering + * nested guest mode on RSM. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "smm.h" +#include "hyperv.h" +#include "vmx.h" + +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define SYNC_PORT 0xe + +#define STR(x) #x +#define XSTR(s) STR(s) + +/* + * SMI handler: runs in real-address mode. + * Reports SMRAM_STAGE via port IO, then does RSM. + */ +static uint8_t smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +static inline void sync_with_host(uint64_t phase) +{ + asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n" + : "+a" (phase)); +} + +static void l2_guest_code(void) +{ + sync_with_host(1); + + /* After SMI+RSM with invalid controls, we should not reach here. */ + vmcall(); +} + +static void guest_code(struct vmx_pages *vmx_pages, + struct hyperv_test_pages *hv_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Set up Hyper-V enlightenments and eVMCS */ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_evmcs(hv_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 exits via vmcall if test fails */ + sync_with_host(2); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + struct hyperv_test_pages *hv; + struct hv_enlightened_vmcs *evmcs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_regs regs; + int stage_reported; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); + + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva); + + vcpu_run(vcpu); + + /* L2 is running and syncs with host. */ + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == 1, + "Expected stage 1, got %d", stage_reported); + + /* Inject SMI while L2 is running. */ + inject_smi(vcpu); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == SMRAM_STAGE, + "Expected SMM handler stage %#x, got %#x", + SMRAM_STAGE, stage_reported); + + /* + * Guest is now paused in the SMI handler, about to execute RSM. + * Hack the eVMCS page to set-up invalid pin-based execution + * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING). + */ + evmcs = hv->enlightened_vmcs_hva; + evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS; + evmcs->hv_clean_fields = 0; + + /* + * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE, + * copying the invalid pin_based_vm_exec_control into cached_vmcs12. + */ + union { + struct kvm_nested_state state; + char state_[16384]; + } nested_state_buf; + + memset(&nested_state_buf, 0, sizeof(nested_state_buf)); + nested_state_buf.state.size = sizeof(nested_state_buf); + vcpu_nested_state_get(vcpu, &nested_state_buf.state); + + /* + * Resume the guest. The SMI handler executes RSM, which calls + * vmx_leave_smm(). nested_vmx_check_controls() should detect + * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault. + */ + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_vm_free(vm); + return 0; +}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 86ad1c7..8bd37a47 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -13,6 +13,30 @@ #include "linux/psp-sev.h" #include "sev.h" +static void guest_sev_test_msr(uint32_t msr) +{ + uint64_t val = rdmsr(msr); + + wrmsr(msr, val); + GUEST_ASSERT(val == rdmsr(msr)); +} + +#define guest_sev_test_reg(reg) \ +do { \ + uint64_t val = get_##reg(); \ + \ + set_##reg(val); \ + GUEST_ASSERT(val == get_##reg()); \ +} while (0) + +static void guest_sev_test_regs(void) +{ + guest_sev_test_msr(MSR_EFER); + guest_sev_test_reg(cr0); + guest_sev_test_reg(cr3); + guest_sev_test_reg(cr4); + guest_sev_test_reg(cr8); +} #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) @@ -24,6 +48,8 @@ static void guest_snp_code(void) GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + guest_sev_test_regs(); + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); vmgexit(); } @@ -34,6 +60,8 @@ static void guest_sev_es_code(void) GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); + guest_sev_test_regs(); + /* * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply * force "termination" to signal "done" via the GHCB MSR protocol. @@ -47,6 +75,8 @@ static void guest_sev_code(void) GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + guest_sev_test_regs(); + GUEST_DONE(); }
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d6..ade8412 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -14,13 +14,11 @@ #include "test_util.h" #include "kvm_util.h" +#include "smm.h" #include "vmx.h" #include "svm_util.h" -#define SMRAM_SIZE 65536 -#define SMRAM_MEMSLOT ((1 << 16) | 1) -#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) #define SMRAM_GPA 0x1000000 #define SMRAM_STAGE 0xfe @@ -113,18 +111,6 @@ static void guest_code(void *arg) sync_with_host(DONE); } -void inject_smi(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - events.smi.pending = 1; - events.flags |= KVM_VCPUEVENT_VALID_SMM; - - vcpu_events_set(vcpu, &events); -} - int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; @@ -140,16 +126,7 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, - SMRAM_MEMSLOT, SMRAM_PAGES, 0); - TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) - == SMRAM_GPA, "could not allocate guest physical addresses?"); - - memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); - memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, - sizeof(smi_handler)); - - vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { if (kvm_cpu_has(X86_FEATURE_SVM))
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c index 37ef0d2..2b9ad4f 100644 --- a/tools/testing/selftests/landlock/tsync_test.c +++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -6,9 +6,10 @@ */ #define _GNU_SOURCE -#include <pthread.h> -#include <sys/prctl.h> #include <linux/landlock.h> +#include <pthread.h> +#include <signal.h> +#include <sys/prctl.h> #include "common.h" @@ -158,4 +159,92 @@ TEST(competing_enablement) EXPECT_EQ(0, close(ruleset_fd)); } +static void signal_nop_handler(int sig) +{ +} + +struct signaler_data { + pthread_t target; + volatile bool stop; +}; + +static void *signaler_thread(void *data) +{ + struct signaler_data *sd = data; + + while (!sd->stop) + pthread_kill(sd->target, SIGUSR1); + + return NULL; +} + +/* + * Number of idle sibling threads. This must be large enough that even on + * machines with many cores, the sibling threads cannot all complete their + * credential preparation in a single parallel wave, otherwise the signaler + * thread has no window to interrupt wait_for_completion_interruptible(). + * 200 threads on a 64-core machine yields ~3 serialized waves, giving the + * tight signal loop enough time to land an interruption. + */ +#define NUM_IDLE_THREADS 200 + +/* + * Exercises the tsync interruption and cancellation paths in tsync.c. + * + * When a signal interrupts the calling thread while it waits for sibling + * threads to finish their credential preparation + * (wait_for_completion_interruptible in landlock_restrict_sibling_threads), + * the kernel sets ERESTARTNOINTR, cancels queued task works that have not + * started yet (cancel_tsync_works), then waits for the remaining works to + * finish. On the error return, syscalls.c aborts the prepared credentials. + * The kernel automatically restarts the syscall, so userspace sees success. + */ +TEST(tsync_interrupt) +{ + size_t i; + pthread_t threads[NUM_IDLE_THREADS]; + pthread_t signaler; + struct signaler_data sd; + struct sigaction sa = {}; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + /* Install a no-op SIGUSR1 handler so the signal does not kill us. */ + sa.sa_handler = signal_nop_handler; + sigemptyset(&sa.sa_mask); + ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL)); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) + ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL)); + + /* + * Start a signaler thread that continuously sends SIGUSR1 to the + * calling thread. This maximizes the chance of interrupting + * wait_for_completion_interruptible() in the kernel's tsync path. + */ + sd.target = pthread_self(); + sd.stop = false; + ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd)); + + /* + * The syscall may be interrupted and transparently restarted by the + * kernel (ERESTARTNOINTR). From userspace, it should always succeed. + */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + sd.stop = true; + ASSERT_EQ(0, pthread_join(signaler, NULL)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) { + ASSERT_EQ(0, pthread_cancel(threads[i])); + ASSERT_EQ(0, pthread_join(threads[i], NULL)); + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 7aec3ae..c6dafb3 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -1020,7 +1020,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) "size=100000,mode=700"), 0); ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, - "size=2m,mode=700"), 0); + "size=256m,mode=700"), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index afdea6d..605c54c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile
@@ -15,6 +15,7 @@ big_tcp.sh \ bind_bhash.sh \ bpf_offload.py \ + bridge_vlan_dump.sh \ broadcast_ether_dst.sh \ broadcast_pmtu.sh \ busy_poll_test.sh \
diff --git a/tools/testing/selftests/net/bridge_vlan_dump.sh b/tools/testing/selftests/net/bridge_vlan_dump.sh new file mode 100755 index 0000000..ad66731 --- /dev/null +++ b/tools/testing/selftests/net/bridge_vlan_dump.sh
@@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge VLAN range grouping. VLANs are collapsed into a range entry in +# the dump if they have the same per-VLAN options. These tests verify that +# VLANs with different per-VLAN option values are not grouped together. + +# shellcheck disable=SC1091,SC2034,SC2154,SC2317 +source lib.sh + +ALL_TESTS=" + vlan_range_neigh_suppress + vlan_range_mcast_max_groups + vlan_range_mcast_n_groups + vlan_range_mcast_enabled +" + +setup_prepare() +{ + setup_ns NS + defer cleanup_all_ns + + ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1 + ip -n "$NS" link set dev br0 up + + ip -n "$NS" link add name dummy0 type dummy + ip -n "$NS" link set dev dummy0 master br0 + ip -n "$NS" link set dev dummy0 up +} + +vlan_range_neigh_suppress() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different neigh_suppress values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off + check_err $? "Failed to set neigh_suppress for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different neigh_suppress incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same neigh_suppress value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on + check_err $? "Failed to set neigh_suppress for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same neigh_suppress not grouped" + + log_test "VLAN range grouping with neigh_suppress" +} + +vlan_range_mcast_max_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_max_groups values and verify no range grouping + bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 10" + + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + # Verify VLANs are not shown as a range, but individual entries exist + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_max_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_max_groups value and verify range grouping + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100 + check_err $? "Failed to set mcast_max_groups for VLAN 11" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_max_groups not grouped" + + log_test "VLAN range grouping with mcast_max_groups" +} + +vlan_range_mcast_n_groups() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Add different numbers of multicast groups to each VLAN + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10 + check_err $? "Failed to add mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10 + check_err $? "Failed to add second mdb entry to VLAN 10" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10 + + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11 + check_err $? "Failed to add mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11 + + # Verify VLANs are not shown as a range due to different mcast_n_groups + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_n_groups incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Add another group to VLAN 11 to match VLAN 10's count + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11 + check_err $? "Failed to add second mdb entry to VLAN 11" + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_n_groups not grouped" + + log_test "VLAN range grouping with mcast_n_groups" +} + +vlan_range_mcast_enabled() +{ + RET=0 + + # Add two new consecutive VLANs for range grouping test + bridge -n "$NS" vlan add vid 10 dev br0 self + defer bridge -n "$NS" vlan del vid 10 dev br0 self + + bridge -n "$NS" vlan add vid 11 dev br0 self + defer bridge -n "$NS" vlan del vid 11 dev br0 self + + bridge -n "$NS" vlan add vid 10 dev dummy0 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 + + bridge -n "$NS" vlan add vid 11 dev dummy0 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 + + # Configure different mcast_snooping for bridge VLANs + # Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs + bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1 + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0 + + # Verify port VLANs are not grouped due to different mcast_enabled + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_fail $? "VLANs with different mcast_enabled incorrectly grouped" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" + check_err $? "VLAN 10 individual entry not found" + + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" + check_err $? "VLAN 11 individual entry not found" + + # Configure same mcast_snooping and verify range grouping + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1 + + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" + check_err $? "VLANs with same mcast_enabled not grouped" + + log_test "VLAN range grouping with mcast_enabled" +} + +# Verify the newest tested option is supported +if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then + echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support" + exit "$ksft_skip" +fi + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 21026b6..6eb7f95 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1672,6 +1672,17 @@ run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" + + # Test IPv4 route with loopback IPv6 nexthop + # Regression test: loopback IPv6 nexthop was misclassified as reject + # route, skipping nhc_pcpu_rth_output allocation, causing panic when + # an IPv4 route references it and triggers __mkroute_output(). + run_cmd "$IP -6 nexthop add id 20 dev lo" + run_cmd "$IP ro add 172.20.20.0/24 nhid 20" + run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1" + log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)" + run_cmd "$IP ro del 172.20.20.0/24" + run_cmd "$IP nexthop del id 20" } ipv4_fcnal_runtime()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc..829f72c 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh
@@ -868,6 +868,64 @@ check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -876,9 +934,6 @@ return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index dc1f200..a3144d7 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -104,6 +104,24 @@ 6 0 0 65535, 6 0 0 0" +# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption +# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c && +# (ip[66] & 0xf0) == 0x40'" +CBPF_MPTCP_SUBOPTION_RM_ADDR="13, + 48 0 0 0, + 84 0 0 240, + 21 0 9 64, + 48 0 0 32, + 84 0 0 240, + 21 0 6 192, + 48 0 0 53, + 21 0 4 12, + 48 0 0 66, + 84 0 0 240, + 21 0 1 64, + 6 0 0 65535, + 6 0 0 0" + init_partial() { capout=$(mktemp) @@ -2608,6 +2626,19 @@ chk_rst_nr 0 0 fi + # signal+subflow with limits, remove + if reset "remove signal+subflow with limits"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow + pm_nl_set_limits $ns2 0 0 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 1 + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 + fi + # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 @@ -4217,6 +4248,14 @@ chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 local i for i in $(seq 3); do pm_nl_del_endpoint $ns2 1 10.0.1.2 @@ -4229,6 +4268,7 @@ chk_subflow_nr "after re-add id 0 ($i)" 3 chk_mptcp_info subflows 3 subflows 3 done + [ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1 mptcp_lib_kill_group_wait $tests_pid @@ -4288,11 +4328,20 @@ chk_mptcp_info subflows 2 subflows 2 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + # To make sure RM_ADDR are sent over a different subflow, but + # allow the rest to quickly and cleanly close the subflow + local ipt=1 + ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \ + -p tcp -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ + -j DROP || ipt=0 pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1 pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal wait_mpj 4
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 806aaa7..d11a8b9 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -237,10 +237,13 @@ for dev in ns2eth1 ns2eth2; do tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 done - tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 - tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 - tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # keep the queued pkts number low, or the RTT estimator will see + # increasing latency over time. + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50 # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%)
diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 9e56b9d..116c0ca 100644 --- a/tools/testing/selftests/net/netfilter/nf_queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c
@@ -18,6 +18,7 @@ struct options { bool count_packets; bool gso_enabled; + bool failopen; int verbose; unsigned int queue_num; unsigned int timeout; @@ -30,7 +31,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -236,6 +237,8 @@ struct mnl_socket *open_queue(void) flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; flags |= NFQA_CFG_F_UID_GID; + if (opts.failopen) + flags |= NFQA_CFG_F_FAIL_OPEN; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -329,7 +332,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { + while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -366,6 +369,9 @@ static void parse_opts(int argc, char **argv) case 'G': opts.gso_enabled = false; break; + case 'o': + opts.failopen = true; + break; case 'v': opts.verbose++; break;
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 394166f..ffdc6cc 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -29,7 +29,8 @@ net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate + insert_overlap load_flush_load4 load_flush_load8" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -432,6 +433,30 @@ perf_duration 0 " +TYPE_load_flush_load4=" +display reload with flush, 4bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_load_flush_load8=" +display reload with flush, 8bit groups +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1997,6 +2022,49 @@ return 0 } +test_bug_load_flush_load4() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 0 255); do + local addelem="add element inet filter test" + local j + + for j in $(seq 0 20); do + echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }" + echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }" + done + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + +test_bug_load_flush_load8() +{ + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + for i in $(seq 1 100); do + echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }" + echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }" + done > "$tmp" + + nft -f "$tmp" || return 1 + + ( echo "flush set inet filter test";cat "$tmp") | nft -f - + [ $? -eq 0 ] || return 1 + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" }
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 139bc12..ea766bd 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -591,6 +591,7 @@ test_udp_gro_ct() { local errprefix="FAIL: test_udp_gro_ct:" + local timeout=5 ip netns exec "$nsrouter" conntrack -F 2>/dev/null @@ -630,10 +631,10 @@ } } EOF - timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & local rpid=$! - ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" & + ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" & local nfqpid=$! ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on @@ -643,8 +644,12 @@ local bs=512 local count=$(((32 * 1024 * 1024) / bs)) - dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do - timeout 5 ip netns exec "$ns1" \ + + local nprocs=$(nproc) + [ $nprocs -gt 1 ] && nprocs=$((nprocs - 1)) + + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do + timeout "$timeout" nice -n 19 ip netns exec "$ns1" \ socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & done
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt index 3848b41..6c0f32c 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -38,7 +38,7 @@ // If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd +0 < P. 4001:54001(50000) ack 1 win 257 - +0 > . 1:1(0) ack 54001 win 0 + * > . 1:1(0) ack 54001 win 0 // Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. +0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt deleted file mode 100644 index f575c0f..0000000 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt +++ /dev/null
@@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - ---mss=1000 - -`./defaults.sh` - - 0 `nstat -n` - -// Establish a connection. - +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 - +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 - +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 - +0 bind(3, ..., ...) = 0 - +0 listen(3, 1) = 0 - - +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> - +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> - +.1 < . 1:1(0) ack 1 win 257 - - +0 accept(3, ..., ...) = 4 - - +0 < P. 1:20001(20000) ack 1 win 257 - +.04 > . 1:1(0) ack 20001 win 18000 - - +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 - +0 < P. 20001:80001(60000) ack 1 win 257 - +0 > . 1:1(0) ack 20001 win 18000 - - +0 read(4, ..., 20000) = 20000 -// A too big packet is accepted if the receive queue is empty - +0 < P. 20001:80001(60000) ack 1 win 257 - +0 > . 1:1(0) ack 80001 win 0 -
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt new file mode 100644 index 0000000..e245359 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt
@@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Some TCP stacks send FINs even though the window is closed. We break +// a possible FIN/ACK loop by accepting the FIN. + +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:60001(60000) ack 1 win 257 + * > . 1:1(0) ack 60001 win 0 + + +0 < F. 60001:60001(0) ack 1 win 257 + +0 > . 1:1(0) ack 60002 win 0
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 248c2b91..5a5ff88 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -28,6 +28,7 @@ kci_test_fdb_get kci_test_fdb_del kci_test_neigh_get + kci_test_neigh_update kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding @@ -1160,6 +1161,60 @@ end_test "PASS: neigh get" } +kci_test_neigh_update() +{ + dstip=10.0.2.4 + dstmac=de:ad:be:ef:13:37 + local ret=0 + + for proxy in "" "proxy" ; do + # add a neighbour entry without any flags + run_cmd ip neigh add $proxy $dstip dev "$devdummy" lladdr $dstmac nud permanent + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .*\(managed\|use\|router\|extern\)" ip neigh show $proxy + + # set the extern_learn flag, but no other + run_cmd ip neigh change $proxy $dstip dev "$devdummy" extern_learn + run_cmd_grep "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* \(managed\|use\|router\)" ip neigh show $proxy + + # flags are reset when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep $dstip ip neigh show $proxy + run_cmd_grep_fail "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy + + # add a protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol boot + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # protocol is retained when not provided + run_cmd ip neigh change $proxy $dstip dev "$devdummy" + run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy + + # change protocol + run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol static + run_cmd_grep "$dstip dev $devdummy .* proto static" ip neigh show $proxy + + # also check an extended flag for non-proxy neighs + if [ "$proxy" = "" ]; then + run_cmd ip neigh change $proxy $dstip dev "$devdummy" managed + run_cmd_grep "$dstip dev $devdummy managed" ip neigh show $proxy + + run_cmd ip neigh change $proxy $dstip dev "$devdummy" lladdr $dstmac + run_cmd_grep_fail "$dstip dev $devdummy managed" ip neigh show $proxy + fi + + run_cmd ip neigh del $proxy $dstip dev "$devdummy" + done + + if [ $ret -ne 0 ];then + end_test "FAIL: neigh update" + return 1 + fi + + end_test "PASS: neigh update" +} + kci_test_bridge_parent_id() { local ret=0
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c index 8a5cd5c..cf106a4 100644 --- a/tools/testing/selftests/net/tun.c +++ b/tools/testing/selftests/net/tun.c
@@ -944,8 +944,8 @@ TEST_F(tun_vnet_udptnl, send_gso_packet) ASSERT_EQ(ret, off); ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss); - ASSERT_EQ(ret, variant->data_size); - ASSERT_EQ(r_num_mss, variant->r_num_mss); + EXPECT_EQ(ret, variant->data_size); + EXPECT_EQ(r_num_mss, variant->r_num_mss); } TEST_F(tun_vnet_udptnl, recv_gso_packet) @@ -955,18 +955,18 @@ TEST_F(tun_vnet_udptnl, recv_gso_packet) int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; ret = send_gso_packet_into_tunnel(self, variant); - ASSERT_EQ(ret, variant->data_size); + EXPECT_EQ(ret, variant->data_size); memset(&vnet_hdr, 0, sizeof(vnet_hdr)); ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr); - ASSERT_EQ(ret, variant->data_size); + EXPECT_EQ(ret, variant->data_size); if (!variant->no_gso) { - ASSERT_EQ(vh->gso_size, variant->gso_size); + EXPECT_EQ(vh->gso_size, variant->gso_size); gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) : (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6); - ASSERT_EQ(vh->gso_type, gso_type); + EXPECT_EQ(vh->gso_type, gso_type); } }
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 7283e8b..80d4270 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -2,8 +2,8 @@ copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 -copyuser_p7_t0 -copyuser_p7_t1 +copyuser_p7 +copyuser_p7_vmx memcpy_64_t0 memcpy_64_t1 memcpy_64_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 42940f9..0c8efb0 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ - copyuser_p7_t0 copyuser_p7_t1 \ + copyuser_p7 copyuser_p7_vmx \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ @@ -28,10 +28,15 @@ -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES) +$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ -D COPY_LOOP=test___copy_tofrom_user_power7 \ - -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \ + -o $@ $^ + +$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_power7_vmx \ + -D VMX_TEST \ -o $@ $^ # Strictly speaking, we only need the memcpy_64 test cases for big-endian
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S index ec8bcf2..3a9cb8c 100644 --- a/tools/testing/selftests/powerpc/copyloops/stubs.S +++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -1,13 +1,5 @@ #include <asm/ppc_asm.h> -FUNC_START(enter_vmx_usercopy) - li r3,1 - blr - -FUNC_START(exit_vmx_usercopy) - li r3,0 - blr - FUNC_START(enter_vmx_ops) li r3,1 blr
diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c index 0f68736..fb82253 100644 --- a/tools/testing/selftests/powerpc/copyloops/validate.c +++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@@ -12,6 +12,10 @@ #define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE) #define POISON 0xa5 +#ifdef VMX_TEST +#define VMX_COPY_THRESHOLD 3328 +#endif + unsigned long COPY_LOOP(void *to, const void *from, unsigned long size); static void do_one(char *src, char *dst, unsigned long src_off, @@ -81,8 +85,12 @@ int test_copy_loop(void) /* Fill with sequential bytes */ for (i = 0; i < BUFLEN; i++) fill[i] = i & 0xff; - +#ifdef VMX_TEST + /* Force sizes above kernel VMX threshold (3328) */ + for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) { +#else for (len = 1; len < MAX_LEN; len++) { +#endif for (src_off = 0; src_off < MAX_OFFSET; src_off++) { for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { do_one(src, dst, src_off, dst_off, len, @@ -96,5 +104,10 @@ int test_copy_loop(void) int main(void) { +#ifdef VMX_TEST + /* Skip if Altivec not present */ + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported"); +#endif + return test_harness(test_copy_loop, str(COPY_LOOP)); }
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 07f5e0a..f943cdf 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -2,7 +2,9 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index c70cf04..06e4d10 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index bc9eeab..71da6e3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n @@ -7,4 +8,3 @@ CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 index 2f9fcff..dd2bd4e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 index 0953c52..2be53bf 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n @@ -11,4 +12,3 @@ #CHECK#CONFIG_RCU_STALL_COMMON=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 30439f6..be88603 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 18efab3..8fb124c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,7 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=5 CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 67caf42..ac857d5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -1,10 +1,12 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_LAZY=y CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y +#CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 9f48c73..61d15b1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -1,6 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TREE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index db27651..0e090bb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -1,9 +1,12 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n #CHECK#CONFIG_TREE_RCU=y +#CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 420632b..b2ce378 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -6,6 +6,7 @@ CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TREE_RCU=y +CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL index 5d546ef..696fba9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
@@ -1,6 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_DYNAMIC=n +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index 0fa2dc0..c1a7d38 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index 0059592..f3f1368 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index 67f9d29..1f215e1 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/TINY b/tools/testing/selftests/rcutorture/configs/refscale/TINY index 75934398..9a11b57 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/refscale/TINY
@@ -1,5 +1,6 @@ CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT index 6133f54..f9621e2 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -1,5 +1,6 @@ CONFIG_SMP=y -CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_LAZY=y +CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/riscv/vector/validate_v_ptrace.c b/tools/testing/selftests/riscv/vector/validate_v_ptrace.c index 3589549..74b6f6b 100644 --- a/tools/testing/selftests/riscv/vector/validate_v_ptrace.c +++ b/tools/testing/selftests/riscv/vector/validate_v_ptrace.c
@@ -290,10 +290,11 @@ TEST(ptrace_v_syscall_clobbering) /* verify initial vsetvli settings */ - if (is_xtheadvector_supported()) + if (is_xtheadvector_supported()) { EXPECT_EQ(5UL, regset_data->vtype); - else + } else { EXPECT_EQ(9UL, regset_data->vtype); + } EXPECT_EQ(regset_data->vlenb, regset_data->vl); EXPECT_EQ(vlenb, regset_data->vlenb); @@ -346,8 +347,8 @@ FIXTURE_TEARDOWN(v_csr_invalid) { } -#define VECTOR_1_0 BIT(0) -#define XTHEAD_VECTOR_0_7 BIT(1) +#define VECTOR_1_0 _BITUL(0) +#define XTHEAD_VECTOR_0_7 _BITUL(1) #define vector_test(x) ((x) & VECTOR_1_0) #define xthead_test(x) ((x) & XTHEAD_VECTOR_0_7) @@ -619,10 +620,11 @@ TEST_F(v_csr_invalid, ptrace_v_invalid_values) /* verify initial vsetvli settings */ - if (is_xtheadvector_supported()) + if (is_xtheadvector_supported()) { EXPECT_EQ(5UL, regset_data->vtype); - else + } else { EXPECT_EQ(9UL, regset_data->vtype); + } EXPECT_EQ(regset_data->vlenb, regset_data->vl); EXPECT_EQ(vlenb, regset_data->vlenb); @@ -827,10 +829,11 @@ TEST_F(v_csr_valid, ptrace_v_valid_values) /* verify initial vsetvli settings */ - if (is_xtheadvector_supported()) + if (is_xtheadvector_supported()) { EXPECT_EQ(5UL, regset_data->vtype); - else + } else { EXPECT_EQ(9UL, regset_data->vtype); + } EXPECT_EQ(regset_data->vlenb, regset_data->vl); EXPECT_EQ(vlenb, regset_data->vlenb);
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 2c601a7..1c9ca32 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile
@@ -93,6 +93,8 @@ $(CLANG_SYS_INCLUDES) \ -Wall -Wno-compare-distinct-pointer-types \ -Wno-incompatible-function-pointer-types \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -O2 -mcpu=v3 # sort removes libbpf duplicates when not cross-building @@ -186,6 +188,7 @@ rt_stall \ test_example \ total_bw \ + cyclic_kick_wait \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c new file mode 100644 index 0000000..cb34d33 --- /dev/null +++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
@@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock. + * + * Three CPUs are designated from userspace. Every enqueue from one of the + * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a + * persistent A -> B -> C -> A wait cycle pressure. + */ +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +const volatile s32 test_cpu_a; +const volatile s32 test_cpu_b; +const volatile s32 test_cpu_c; + +u64 nr_enqueues; +u64 nr_wait_kicks; + +UEI_DEFINE(uei); + +static s32 target_cpu(s32 cpu) +{ + if (cpu == test_cpu_a) + return test_cpu_b; + if (cpu == test_cpu_b) + return test_cpu_c; + if (cpu == test_cpu_c) + return test_cpu_a; + return -1; +} + +void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p, + u64 enq_flags) +{ + s32 this_cpu = bpf_get_smp_processor_id(); + s32 tgt; + + __sync_fetch_and_add(&nr_enqueues, 1); + + if (p->flags & PF_KTHREAD) { + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, + enq_flags | SCX_ENQ_PREEMPT); + return; + } + + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + + tgt = target_cpu(this_cpu); + if (tgt < 0 || tgt == this_cpu) + return; + + __sync_fetch_and_add(&nr_wait_kicks, 1); + scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT); +} + +void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops cyclic_kick_wait_ops = { + .enqueue = cyclic_kick_wait_enqueue, + .exit = cyclic_kick_wait_exit, + .name = "cyclic_kick_wait", + .timeout_ms = 1000U, +};
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c new file mode 100644 index 0000000..c2e5aa9 --- /dev/null +++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c
@@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Test SCX_KICK_WAIT forward progress under cyclic wait pressure. + * + * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path. + * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits + * for A), all CPUs deadlock unless the implementation breaks the cycle. + * + * This test creates that scenario: three CPUs are arranged in a ring. The BPF + * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT + * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling + * sched_yield(), generating a steady stream of enqueues and thus sustained + * A->B->C->A kick_wait cycle pressure. The test passes if the system remains + * responsive for 5 seconds without the scheduler being killed by the watchdog. + */ +#define _GNU_SOURCE + +#include <bpf/bpf.h> +#include <errno.h> +#include <pthread.h> +#include <sched.h> +#include <scx/common.h> +#include <stdint.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "scx_test.h" +#include "cyclic_kick_wait.bpf.skel.h" + +#define WORKERS_PER_CPU 4 +#define NR_TEST_CPUS 3 +#define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU) + +struct worker_ctx { + pthread_t tid; + int cpu; + volatile bool stop; + volatile __u64 iters; + bool started; +}; + +static void *worker_fn(void *arg) +{ + struct worker_ctx *worker = arg; + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(worker->cpu, &mask); + + if (sched_setaffinity(0, sizeof(mask), &mask)) + return (void *)(uintptr_t)errno; + + while (!worker->stop) { + sched_yield(); + worker->iters++; + } + + return NULL; +} + +static int join_worker(struct worker_ctx *worker) +{ + void *ret; + struct timespec ts; + int err; + + if (!worker->started) + return 0; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + return -errno; + + ts.tv_sec += 2; + err = pthread_timedjoin_np(worker->tid, &ret, &ts); + if (err == ETIMEDOUT) + pthread_detach(worker->tid); + if (err) + return -err; + + if ((uintptr_t)ret) + return -(int)(uintptr_t)ret; + + return 0; +} + +static enum scx_test_status setup(void **ctx) +{ + struct cyclic_kick_wait *skel; + + skel = cyclic_kick_wait__open(); + SCX_FAIL_IF(!skel, "Failed to open skel"); + SCX_ENUM_INIT(skel); + + *ctx = skel; + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct cyclic_kick_wait *skel = ctx; + struct worker_ctx workers[NR_WORKERS] = {}; + struct bpf_link *link = NULL; + enum scx_test_status status = SCX_TEST_PASS; + int test_cpus[NR_TEST_CPUS]; + int nr_cpus = 0; + cpu_set_t mask; + int ret, i; + + if (sched_getaffinity(0, sizeof(mask), &mask)) { + SCX_ERR("Failed to get affinity (%d)", errno); + return SCX_TEST_FAIL; + } + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &mask)) + test_cpus[nr_cpus++] = i; + if (nr_cpus == NR_TEST_CPUS) + break; + } + + if (nr_cpus < NR_TEST_CPUS) + return SCX_TEST_SKIP; + + skel->rodata->test_cpu_a = test_cpus[0]; + skel->rodata->test_cpu_b = test_cpus[1]; + skel->rodata->test_cpu_c = test_cpus[2]; + + if (cyclic_kick_wait__load(skel)) { + SCX_ERR("Failed to load skel"); + return SCX_TEST_FAIL; + } + + link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops); + if (!link) { + SCX_ERR("Failed to attach scheduler"); + return SCX_TEST_FAIL; + } + + for (i = 0; i < NR_WORKERS; i++) + workers[i].cpu = test_cpus[i / WORKERS_PER_CPU]; + + for (i = 0; i < NR_WORKERS; i++) { + ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]); + if (ret) { + SCX_ERR("Failed to create worker thread %d (%d)", i, ret); + status = SCX_TEST_FAIL; + goto out; + } + workers[i].started = true; + } + + sleep(5); + + if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) { + SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)", + (unsigned long long)skel->data->uei.kind, + (long long)skel->data->uei.exit_code); + status = SCX_TEST_FAIL; + } + +out: + for (i = 0; i < NR_WORKERS; i++) + workers[i].stop = true; + + for (i = 0; i < NR_WORKERS; i++) { + ret = join_worker(&workers[i]); + if (ret && status == SCX_TEST_PASS) { + SCX_ERR("Failed to join worker thread %d (%d)", i, ret); + status = SCX_TEST_FAIL; + } + } + + if (link) + bpf_link__destroy(link); + + return status; +} + +static void cleanup(void *ctx) +{ + struct cyclic_kick_wait *skel = ctx; + + cyclic_kick_wait__destroy(skel); +} + +struct scx_test cyclic_kick_wait = { + .name = "cyclic_kick_wait", + .description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&cyclic_kick_wait)
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 82c7165..44577e3 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c
@@ -57,7 +57,8 @@ static enum scx_test_status run_test(bool global) char buf; close(pipe_fds[1]); - read(pipe_fds[0], &buf, 1); + if (read(pipe_fds[0], &buf, 1) < 0) + exit(1); close(pipe_fds[0]); exit(0); }
diff --git a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c index a3faf5b..784f2f6 100644 --- a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
@@ -58,14 +58,14 @@ static void record_peek_result(long pid) { u32 slot_key; long *slot_pid_ptr; - int ix; + u32 ix; if (pid <= 0) return; /* Find an empty slot or one with the same PID */ bpf_for(ix, 0, 10) { - slot_key = (pid + ix) % MAX_SAMPLES; + slot_key = ((u64)pid + ix) % MAX_SAMPLES; slot_pid_ptr = bpf_map_lookup_elem(&peek_results, &slot_key); if (!slot_pid_ptr) continue;
diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c index ab772e3..81ea9b4 100644 --- a/tools/testing/selftests/sched_ext/rt_stall.c +++ b/tools/testing/selftests/sched_ext/rt_stall.c
@@ -15,7 +15,6 @@ #include <signal.h> #include <bpf/bpf.h> #include <scx/common.h> -#include <unistd.h> #include "rt_stall.bpf.skel.h" #include "scx_test.h" #include "../kselftest.h"
diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c index 5748d2c..761c21f 100644 --- a/tools/testing/selftests/sched_ext/runner.c +++ b/tools/testing/selftests/sched_ext/runner.c
@@ -166,6 +166,9 @@ int main(int argc, char **argv) enum scx_test_status status; struct scx_test *test = &__scx_tests[i]; + if (exit_req) + break; + if (list) { printf("%s\n", test->name); if (i == (__scx_num_tests - 1))
diff --git a/tools/testing/selftests/sched_ext/util.c b/tools/testing/selftests/sched_ext/util.c index e47769c..2111329 100644 --- a/tools/testing/selftests/sched_ext/util.c +++ b/tools/testing/selftests/sched_ext/util.c
@@ -60,11 +60,11 @@ int file_write_long(const char *path, long val) char buf[64]; int ret; - ret = sprintf(buf, "%lu", val); + ret = sprintf(buf, "%ld", val); if (ret < 0) return ret; - if (write_text(path, buf, sizeof(buf)) <= 0) + if (write_text(path, buf, ret) <= 0) return -1; return 0;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json index 7d07c55..33bb8f3 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -505,5 +505,164 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "8883", + "name": "Try to attach act_ct to an ets qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 root handle 1: ets bands 2" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: prio 1 protocol ip matchall action ct index 42", + "expExitCode": "2", + "verifyCmd": "$TC -j filter ls dev $DEV1 parent 1: prio 1 protocol ip", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DEV1 root" + ] + }, + { + "id": "3b10", + "name": "Attach act_ct to an ingress qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls dev $DEV1 ingress prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "0337", + "name": "Attach act_ct to a clsact/egress qdisc", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 clsact" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 egress prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls dev $DEV1 egress prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "4f60", + "name": "Attach act_ct to a shared block", + "category": [ + "actions", + "ct" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress_block 21 clsact" + ], + "cmdUnderTest": "$TC filter add block 21 prio 1 protocol ip matchall action ct index 42", + "expExitCode": "0", + "verifyCmd": "$TC -j filter ls block 21 prio 1 protocol ip", + "matchJSON": [ + { + "kind": "matchall" + }, + { + "options": { + "actions": [ + { + "order": 1, + "kind": "ct", + "index": 42, + "ref": 1, + "bind": 1 + } + ] + } + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact" + ] } ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index f587a32..808aef4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1279,5 +1279,104 @@ "teardown": [ "$TC actions flush action ife" ] + }, + { + "id": "f2a0", + "name": "Update decode ife action with encode metadata", + "category": [ + "actions", + "ife" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife decode index 10" + ], + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 index 10", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action ife index 10", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "ife", + "mode": "encode", + "control_action": { + "type": "pipe" + }, + "type": "0xed3e", + "tcindex": 1, + "index": 10, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d352", + "name": "Update decode ife action into encode with multiple metadata", + "category": [ + "actions", + "ife" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife decode index 10" + ], + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 use mark 22 index 10", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action ife index 10", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "ife", + "mode": "encode", + "control_action": { + "type": "pipe" + }, + "type": "0xed3e", + "tcindex": 1, + "mark": 22, + "index": 10, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC actions flush action ife" + ] } ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json index 8d10042..dbce643 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
@@ -22,5 +22,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1: htb default 1" ] + }, + { + "id": "b7e3", + "name": "Empty fw filter on shared block - rejected at config time", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 egress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 fw", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "fw", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c8f4", + "name": "Flow filter on shared block without baseclass - rejected at config time", + "category": [ + "filter", + "flow" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 handle 1 flow map key dst", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "flow", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] } ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 6a39640..1e5efb2 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1111,5 +1111,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1:" ] + }, + { + "id": "a3d7", + "name": "HFSC with large m1 - no divide-by-zero on class reactivation", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc replace dev $DUMMY root handle 1: hfsc default 1", + "$TC class replace dev $DUMMY parent 1: classid 1:1 hfsc rt m1 32gbit d 1ms m2 0bit ls m1 32gbit d 1ms m2 0bit", + "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "sleep 1" + ], + "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ]
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index 0440d25..bc447ca 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -38,10 +38,14 @@ def list_categories(testlist): - """ Show all categories that are present in a test case file. """ - categories = set(map(lambda x: x['category'], testlist)) + """Show all unique categories present in the test cases.""" + categories = set() + for t in testlist: + if 'category' in t: + categories.update(t['category']) + print("Available categories:") - print(", ".join(str(s) for s in categories)) + print(", ".join(sorted(categories))) print("")
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index dc8dbe7..86e3388 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -210,16 +210,21 @@ } add_namespaces() { - local orig_mode - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) + ip netns add "global-parent" 2>/dev/null + echo "global" | ip netns exec "global-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null + ip netns add "local-parent" 2>/dev/null + echo "local" | ip netns exec "local-parent" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null - for mode in "${NS_MODES[@]}"; do - echo "${mode}" > /proc/sys/net/vsock/child_ns_mode - ip netns add "${mode}0" 2>/dev/null - ip netns add "${mode}1" 2>/dev/null - done - - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global0" 2>/dev/null + nsenter --net=/var/run/netns/global-parent \ + ip netns add "global1" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local0" 2>/dev/null + nsenter --net=/var/run/netns/local-parent \ + ip netns add "local1" 2>/dev/null } init_namespaces() { @@ -237,6 +242,8 @@ log_host "removed ns ${mode}0" log_host "removed ns ${mode}1" done + ip netns del "global-parent" &>/dev/null + ip netns del "local-parent" &>/dev/null } vm_ssh() { @@ -287,7 +294,7 @@ } check_deps() { - for dep in vng ${QEMU} busybox pkill ssh ss socat; do + for dep in vng ${QEMU} busybox pkill ssh ss socat nsenter; do if [[ ! -x $(command -v "${dep}") ]]; then echo -e "skip: dependency ${dep} not found!\n" exit "${KSFT_SKIP}" @@ -1231,12 +1238,8 @@ } test_ns_host_vsock_child_ns_mode_ok() { - local orig_mode - local rc + local rc="${KSFT_PASS}" - orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) - - rc="${KSFT_PASS}" for mode in "${NS_MODES[@]}"; do local ns="${mode}0" @@ -1246,15 +1249,13 @@ continue fi - if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then - log_host "child_ns_mode should be writable to ${mode}" + if ! echo "${mode}" | ip netns exec "${ns}" \ + tee /proc/sys/net/vsock/child_ns_mode &>/dev/null; then rc="${KSFT_FAIL}" continue fi done - echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode - return "${rc}" }
diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h index 169abea..f7c5675 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.h +++ b/tools/tracing/rtla/src/timerlat_bpf.h
@@ -12,7 +12,6 @@ enum summary_field { }; #ifndef __bpf__ -#include <bpf/libbpf.h> #ifdef HAVE_BPF_SKEL int timerlat_bpf_init(struct timerlat_params *params); int timerlat_bpf_attach(void);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 267c736..794976b 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig
@@ -5,6 +5,7 @@ bool select EVENTFD select INTERVAL_TREE + select MMU_NOTIFIER select PREEMPT_NOTIFIERS config HAVE_KVM_PFNCACHE @@ -93,24 +94,16 @@ config KVM_GENERIC_HARDWARE_ENABLING bool -config KVM_GENERIC_MMU_NOTIFIER - select MMU_NOTIFIER - bool - config KVM_ELIDE_TLB_FLUSH_IF_YOUNG - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_MMU_LOCKLESS_AGING - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_GENERIC_MEMORY_ATTRIBUTES - depends on KVM_GENERIC_MMU_NOTIFIER bool config KVM_GUEST_MEMFD - depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool
diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c index eefca6c..76ce697 100644 --- a/virt/kvm/binary_stats.c +++ b/virt/kvm/binary_stats.c
@@ -50,7 +50,7 @@ * Return: the number of bytes that has been successfully read */ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, - const struct _kvm_stats_desc *desc, + const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset) {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 22f8a67..9093251 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c
@@ -502,7 +502,6 @@ void kvm_destroy_vcpus(struct kvm *kvm) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_destroy_vcpus); -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) { return container_of(mn, struct kvm, mmu_notifier); @@ -901,15 +900,6 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) return mmu_notifier_register(&kvm->mmu_notifier, current->mm); } -#else /* !CONFIG_KVM_GENERIC_MMU_NOTIFIER */ - -static int kvm_init_mmu_notifier(struct kvm *kvm) -{ - return 0; -} - -#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */ - #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER static int kvm_pm_notifier_call(struct notifier_block *bl, unsigned long state, @@ -983,9 +973,9 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) kvm_free_memslot(kvm, memslot); } -static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) +static umode_t kvm_stats_debugfs_mode(const struct kvm_stats_desc *desc) { - switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { + switch (desc->flags & KVM_STATS_TYPE_MASK) { case KVM_STATS_TYPE_INSTANT: return 0444; case KVM_STATS_TYPE_CUMULATIVE: @@ -1020,7 +1010,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; @@ -1226,10 +1216,8 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); -#endif out_err_no_mmu_notifier: kvm_disable_virtualization(); out_err_no_disable: @@ -1292,7 +1280,6 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); -#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); /* * At this point, pending calls to invalidate_range_start() @@ -1311,9 +1298,6 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm->mn_active_invalidate_count = 0; else WARN_ON(kvm->mmu_invalidate_in_progress); -#else - kvm_flush_shadow_all(kvm); -#endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { @@ -4886,6 +4870,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { + case KVM_CAP_SYNC_MMU: case KVM_CAP_USER_MEMORY: case KVM_CAP_USER_MEMORY2: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: @@ -6186,11 +6171,11 @@ static int kvm_stat_data_get(void *data, u64 *val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_get_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; case KVM_STAT_VCPU: r = kvm_get_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset, val); + stat_data->desc->offset, val); break; } @@ -6208,11 +6193,11 @@ static int kvm_stat_data_clear(void *data, u64 val) switch (stat_data->kind) { case KVM_STAT_VM: r = kvm_clear_stat_per_vm(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; case KVM_STAT_VCPU: r = kvm_clear_stat_per_vcpu(stat_data->kvm, - stat_data->desc->desc.offset); + stat_data->desc->offset); break; } @@ -6360,7 +6345,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) static void kvm_init_debug(void) { const struct file_operations *fops; - const struct _kvm_stats_desc *pdesc; + const struct kvm_stats_desc *pdesc; int i; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); @@ -6373,7 +6358,7 @@ static void kvm_init_debug(void) fops = &vm_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { @@ -6384,7 +6369,7 @@ static void kvm_init_debug(void) fops = &vcpu_stat_readonly_fops; debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), kvm_debugfs_dir, - (void *)(long)pdesc->desc.offset, fops); + (void *)(long)pdesc->offset, fops); } }