Merge tag 'probes-fixes-v7.1-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace Pull probes fixes from Masami Hiramatsu: - kprobes: skip non-symbol addresses in kprobe_add_ksym_blacklist() Since the ftrace adds its NOPs at .kprobes.text section (which stores an array), a wrong entry is added when loading a module which uses "__kprobes" attribute. To solve this, add "notrace" to __kprobes functions - test_kprobes: clear kprobes between test runs Clear all kprobes in the test program after running a test set, because Kunit test can run several times - fprobe: Fix unregister_fprobe() to wait for RCU grace period Since the fprobe data structure is removed with hlist_del_rcu(), it should wait for the RCU grace period. If the caller waits for RCU, we can use the async variant (e.g. eBPF) * tag 'probes-fixes-v7.1-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: fprobe: Fix unregister_fprobe() to wait for RCU grace period test_kprobes: clear kprobes between test runs kprobes: skip non-symbol addresses in kprobe_add_ksym_blacklist()
diff --git a/.mailmap b/.mailmap index b78aa09..eec4a74 100644 --- a/.mailmap +++ b/.mailmap
@@ -682,6 +682,7 @@ Peter Hilber <peter.hilber@oss.qualcomm.com> <quic_philber@quicinc.com> Peter Oruba <peter.oruba@amd.com> Peter Oruba <peter@oruba.de> +Peter Rosin <peda@lysator.liu.se> <peda@axentia.se> Pierre-Louis Bossart <pierre-louis.bossart@linux.dev> <pierre-louis.bossart@linux.intel.com> Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com> Pratyush Yadav <pratyush@kernel.org> <ptyadav@amazon.de>
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-user b/Documentation/ABI/removed/sysfs-selinux-user similarity index 100% rename from Documentation/ABI/obsolete/sysfs-selinux-user rename to Documentation/ABI/removed/sysfs-selinux-user
diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst index 9f8e273..7c7cd45 100644 --- a/Documentation/admin-guide/cgroup-v1/memcg_test.rst +++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -47,21 +47,19 @@ Called when swp_entry's refcnt goes down to 0. A charge against swap disappears. -3. charge-commit-cancel +3. charge-commit ======================= Memcg pages are charged in two steps: - mem_cgroup_try_charge() - - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() + - commit_charge() At try_charge(), there are no flags to say "this page is charged". at this point, usage += PAGE_SIZE. At commit(), the page is associated with the memcg. - At cancel(), simply usage -= PAGE_SIZE. - Under below explanation, we assume CONFIG_SWAP=y. 4. Anonymous
diff --git a/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml index c4cc8af..7b59b60 100644 --- a/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml
@@ -16,10 +16,15 @@ properties: compatible: - enum: - - amlogic,meson6-i2c # Meson6, Meson8 and compatible SoCs - - amlogic,meson-gxbb-i2c # GXBB and compatible SoCs - - amlogic,meson-axg-i2c # AXG and compatible SoCs + oneOf: + - items: + - enum: + - amlogic,t7-i2c + - const: amlogic,meson-axg-i2c + - enum: + - amlogic,meson6-i2c # Meson6, Meson8 and compatible SoCs + - amlogic,meson-gxbb-i2c # GXBB and compatible SoCs + - amlogic,meson-axg-i2c # AXG and compatible SoCs reg: maxItems: 1
diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 500a965..9e59200 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml
@@ -22,7 +22,9 @@ compatible: oneOf: - items: - - const: apple,t6020-i2c + - enum: + - apple,t6020-i2c + - apple,t8122-i2c - const: apple,t8103-i2c - items: - enum:
diff --git a/Documentation/hwmon/yogafan.rst b/Documentation/hwmon/yogafan.rst index c553a38..6876194 100644 --- a/Documentation/hwmon/yogafan.rst +++ b/Documentation/hwmon/yogafan.rst
@@ -135,4 +135,4 @@ 4. **Lenovo IdeaPad Laptop Driver:** Reference for DMI-based hardware feature gating in Lenovo laptops. - https://github.com/torvalds/linux/blob/master/drivers/platform/x86/ideapad-laptop.c + https://github.com/torvalds/linux/blob/master/drivers/platform/x86/lenovo/ideapad-laptop.c
diff --git a/Documentation/userspace-api/rseq.rst b/Documentation/userspace-api/rseq.rst index 3cd27a3..8549a6c 100644 --- a/Documentation/userspace-api/rseq.rst +++ b/Documentation/userspace-api/rseq.rst
@@ -24,6 +24,97 @@ Allows to implement per CPU data efficiently. Documentation is in code and selftests. :( +Optimized RSEQ V2 +----------------- + +On architectures which utilize the generic entry code and generic TIF bits +the kernel supports runtime optimizations for RSEQ, which also enable +enhanced features like scheduler time slice extensions. + +To enable them a task has to register the RSEQ region with at least the +length advertised by getauxval(AT_RSEQ_FEATURE_SIZE). + +If existing binaries register with RSEQ_ORIG_SIZE (32 bytes), the kernel +keeps the legacy low performance mode enabled to fulfil the expectations +of existing users regarding the original RSEQ implementation behaviour. + +The following table documents the ABI and behavioral guarantees of the +legacy and the optimized V2 mode. + +.. list-table:: RSEQ modes + :header-rows: 1 + + * - Nr + - What + + - Legacy + - Optimized V2 + + * - 1 + - The cpu_id_start, cpu_id, node_id and mm_cid fields (User mode read + only) + .. Legacy + - Updated by the kernel unconditionally after each context switch and + before signal delivery + .. Optimized V2 + - Updated by the kernel if and only if they change, i.e. if the task + is migrated or mm_cid changes + + * - 2 + - The rseq_cs critical section field + .. Legacy + - Evaluated and handled unconditionally after each context switch and + before signal delivery + .. Optimized V2 + - Evaluated and handled conditionally only when user space was + interrupted and was scheduled out or before delivering a signal in + the interrupted context. + + * - 3 + - Read only fields + .. Legacy + - No strict enforcement except in debug mode + .. Optimized V2 + - Strict enforcement + + * - 4 + - membarrier(...RSEQ) + .. Legacy + - All running threads of the process are interrupted and the ID fields + are rewritten and eventually active critical sections are aborted + before they return to user space. All threads which are scheduled + out whether voluntary or not are covered by #1/#2 above. + .. Optimized V2 + - All running threads of the process are interrupted and eventually + active critical sections are aborted before these threads return to + user space. The ID fields are only updated if changed as a + consequence of the interrupt. All threads which are scheduled out + whether voluntary or not are covered by #1/#2 above. + + * - 5 + - Time slice extensions + .. Legacy + - Not supported + .. Optimized V2 + - Supported + +The legacy mode is obviously less performant as it does unconditional +updates and critical section checks even if not strictly required by the +ABI contract. That can't be changed anymore as some users depend on that +observed behavior, which in turn enables them to violate the ABI and +overwrite the cpu_id_start field for their own purposes. This is obviously +discouraged as it renders RSEQ incompatible with the intended usage and +breaks the expectation of other libraries in the same application. + +The ABI compliant optimized v2 mode, which respects the read only fields, +does not require unconditional updates and therefore is way more +performant. The kernel validates the read only fields for compliance. If +user space modifies them, the process is killed. Compliant usage allows +multiple libraries in the same application to benefit from the RSEQ +functionality without disturbing each other. The ABI compliant optimized v2 +mode also enables extended RSEQ features like time slice extensions. + + Scheduler time slice extensions ------------------------------- @@ -37,7 +128,8 @@ * Enabled at boot time (default is enabled) - * A rseq userspace pointer has been registered for the thread + * A rseq userspace pointer has been registered for the thread in + optimized V2 mode The thread has to enable the functionality via prctl(2)::
diff --git a/MAINTAINERS b/MAINTAINERS index 882214b..b204001 100644 --- a/MAINTAINERS +++ b/MAINTAINERS
@@ -4299,18 +4299,16 @@ F: drivers/video/backlight/aw99706.c AXENTIA ARM DEVICES -M: Peter Rosin <peda@axentia.se> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Orphan F: arch/arm/boot/dts/microchip/at91-linea.dtsi F: arch/arm/boot/dts/microchip/at91-natte.dtsi F: arch/arm/boot/dts/microchip/at91-nattis-2-natte-2.dts F: arch/arm/boot/dts/microchip/at91-tse850-3.dts AXENTIA ASOC DRIVERS -M: Peter Rosin <peda@axentia.se> L: linux-sound@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/sound/axentia,* F: sound/soc/atmel/tse850-pcm5142.c @@ -6358,6 +6356,7 @@ COMMON CLK FRAMEWORK M: Michael Turquette <mturquette@baylibre.com> M: Stephen Boyd <sboyd@kernel.org> +R: Brian Masney <bmasney@redhat.com> L: linux-clk@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-clk/list/ @@ -7077,6 +7076,12 @@ F: include/linux/debugobjects.h F: lib/debugobjects.c +DEC LANCE NETWORK DRIVER +M: "Maciej W. Rozycki" <macro@orcam.me.uk> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/amd/declance.c + DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" <macro@orcam.me.uk> L: linux-mips@vger.kernel.org @@ -8193,10 +8198,9 @@ CORE DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich <dakr@kernel.org> M: Alexandre Courbot <acourbot@nvidia.com> -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next @@ -8205,10 +8209,9 @@ DRM DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich <dakr@kernel.org> -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next @@ -12046,7 +12049,7 @@ F: drivers/i2c/busses/i2c-nvidia-gpu.c I2C MUXES -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-arb* @@ -12447,7 +12450,7 @@ F: include/linux/iio/backend.h IIO DIGITAL POTENTIOMETER DAC -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-dac-dpot-dac @@ -12455,7 +12458,7 @@ F: drivers/iio/dac/dpot-dac.c IIO ENVELOPE DETECTOR -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector @@ -12471,7 +12474,7 @@ F: drivers/iio/test/iio-test-gts.c IIO MULTIPLEXER -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml @@ -12502,7 +12505,7 @@ F: tools/iio/ IIO UNIT CONVERTER -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.yaml @@ -15718,7 +15721,7 @@ F: drivers/media/i2c/max96717.c MAX9860 MONO AUDIO VOICE CODEC DRIVER -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/sound/max9860.txt @@ -15933,7 +15936,7 @@ F: drivers/net/can/spi/mcp251xfd/ MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531 @@ -18238,7 +18241,7 @@ F: include/uapi/linux/mmc/ MULTIPLEXER SUBSYSTEM -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> S: Odd Fixes F: Documentation/ABI/testing/sysfs-class-mux* F: Documentation/devicetree/bindings/mux/ @@ -19347,7 +19350,7 @@ K: "nxp,tda998x" NXP TFA9879 DRIVER -M: Peter Rosin <peda@axentia.se> +M: Peter Rosin <peda@lysator.liu.se> L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/sound/trivial-codec.yaml @@ -20348,13 +20351,14 @@ F: drivers/pci/controller/dwc/pcie-armada8k.c PCI DRIVER FOR CADENCE PCIE IP +R: Aksh Garg <a-garg7@ti.com> L: linux-pci@vger.kernel.org S: Orphan F: Documentation/devicetree/bindings/pci/cdns,* -F: drivers/pci/controller/cadence/*cadence* +F: drivers/pci/controller/cadence/ PCI DRIVER FOR CIX Sky1 -M: Hans Zhang <hans.zhang@cixtech.com> +M: Hans Zhang <18255117159@163.com> L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/cix,sky1-pcie-*.yaml @@ -20466,7 +20470,7 @@ F: drivers/pci/controller/plda/pcie-plda.h PCI DRIVER FOR RENESAS R-CAR -M: Marek Vasut <marek.vasut+renesas@gmail.com> +M: Marek Vasut <marek.vasut+renesas@mailbox.org> M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> L: linux-pci@vger.kernel.org L: linux-renesas-soc@vger.kernel.org @@ -24650,6 +24654,7 @@ F: fs/smb/client/smbdirect.* F: fs/smb/smbdirect/ F: fs/smb/server/transport_rdma.* +F: include/linux/smbdirect.h SMC91x ETHERNET DRIVER M: Nicolas Pitre <nico@fluxnic.net>
diff --git a/Makefile b/Makefile index 9f88dca..b7b80e8 100644 --- a/Makefile +++ b/Makefile
@@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -486,6 +486,8 @@ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ -Wclippy::cast_lossless \ + -Aclippy::collapsible_if \ + -Aclippy::collapsible_match \ -Wclippy::ignored_unit_patterns \ -Aclippy::incompatible_msrv \ -Wclippy::mut_mut \
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ba5eab2..4d08598 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c
@@ -983,8 +983,8 @@ static int sve_set_common(struct task_struct *target, } /* Always zero V regs, FPSR, and FPCR */ - memset(¤t->thread.uw.fpsimd_state, 0, - sizeof(current->thread.uw.fpsimd_state)); + memset(&target->thread.uw.fpsimd_state, 0, + sizeof(target->thread.uw.fpsimd_state)); /* Registers: FPSIMD-only case */
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index beb8499..1c7a0db 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild
@@ -3,7 +3,7 @@ obj-y += net/ obj-y += vdso/ -obj-$(CONFIG_KVM) += kvm/ +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/ # for cleaning subdir- += boot
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3b042db..606597d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig
@@ -220,6 +220,7 @@ choice prompt "Kernel type" + default 64BIT # Keep existing behavior config 32BIT bool "32-bit kernel"
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 47516ae..54fcfa1e 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile
@@ -55,9 +55,11 @@ ifdef CONFIG_32BIT tool-archpref = $(32bit-tool-archpref) UTS_MACHINE := loongarch32 +cflags-y += $(call cc-option,-m32) else tool-archpref = $(64bit-tool-archpref) UTS_MACHINE := loongarch64 +cflags-y += $(call cc-option,-m64) endif ifneq ($(SUBARCH),$(ARCH))
diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index 704066b..de0c17f 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -20,3 +20,23 @@ asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_ struct pt_regs *regs, int (*fn)(void *), void *fn_arg); + +struct kvm_run; +struct kvm_vcpu; +struct loongarch_fpu; + +void kvm_exc_entry(void); +int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); + +void kvm_save_fpu(struct loongarch_fpu *fpu); +void kvm_restore_fpu(struct loongarch_fpu *fpu); + +#ifdef CONFIG_CPU_HAS_LSX +void kvm_save_lsx(struct loongarch_fpu *fpu); +void kvm_restore_lsx(struct loongarch_fpu *fpu); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +void kvm_save_lasx(struct loongarch_fpu *fpu); +void kvm_restore_lasx(struct loongarch_fpu *fpu); +#endif
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 130cedbb..776bc48 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h
@@ -87,7 +87,6 @@ struct kvm_context { struct kvm_world_switch { int (*exc_entry)(void); int (*enter_guest)(struct kvm_run *run, struct kvm_vcpu *vcpu); - unsigned long page_order; }; #define MAX_PGTABLE_LEVELS 4 @@ -359,8 +358,6 @@ void kvm_exc_entry(void); int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); extern unsigned long vpid_mask; -extern const unsigned long kvm_exception_size; -extern const unsigned long kvm_enter_guest_size; extern struct kvm_world_switch *kvm_loongarch_ops; #define SW_GCSR (1 << 0)
diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index a1bd6a3..ae937d1 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h
@@ -69,7 +69,7 @@ 9, 10, 11, 12, 13, 14, 15, 16, \ 17, 18, 19, 20, 21, 22, 23, 24, \ 25, 26, 27, 28, 29, 30, 31; \ - .cfi_offset \num, SC_REGS + \num * SZREG; \ + .cfi_offset \num, SC_REGS + \num * 8; \ .endr; \ \ nop; \
diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index bae7676..18ba403 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h
@@ -85,12 +85,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return count; } -static inline bool loongarch_vdso_hres_capable(void) -{ - return true; -} -#define __arch_vdso_hres_capable loongarch_vdso_hres_capable - #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* !__ASSEMBLER__ */
diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index ae469ed..a4d044d 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile
@@ -7,11 +7,12 @@ obj-$(CONFIG_KVM) += kvm.o +obj-y += switch.o + kvm-y += exit.o kvm-y += interrupt.o kvm-y += main.o kvm-y += mmu.o -kvm-y += switch.o kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index da0ad89f..3b95cd0 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c
@@ -390,6 +390,7 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) run->mmio.len = 8; break; default: + ret = EMULATE_FAIL; break; } break;
diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c index 3293095..a18c60d 100644 --- a/arch/loongarch/kvm/interrupt.c +++ b/arch/loongarch/kvm/interrupt.c
@@ -28,23 +28,29 @@ static unsigned int priority_to_irq[EXCCODE_INT_NUM] = { static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_pending); if (priority < EXCCODE_INT_NUM) irq = priority_to_irq[priority]; - if (kvm_guest_has_msgint(&vcpu->arch) && (priority == INT_AVEC)) { - dmsintc_inject_irq(vcpu); - set_gcsr_estat(irq); - return 1; - } - switch (priority) { + case INT_AVEC: + if (!kvm_guest_has_msgint(&vcpu->arch)) + break; + dmsintc_inject_irq(vcpu); + fallthrough; case INT_TI: case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); set_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7: @@ -61,22 +67,28 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_clear); if (priority < EXCCODE_INT_NUM) irq = priority_to_irq[priority]; - if (kvm_guest_has_msgint(&vcpu->arch) && (priority == INT_AVEC)) { - clear_gcsr_estat(irq); - return 1; - } - switch (priority) { + case INT_AVEC: + if (!kvm_guest_has_msgint(&vcpu->arch)) + break; + fallthrough; case INT_TI: case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); clear_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7:
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 76ebff2..f105a86 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c
@@ -348,8 +348,7 @@ void kvm_arch_disable_virtualization_cpu(void) static int kvm_loongarch_env_init(void) { - int cpu, order, ret; - void *addr; + int cpu, ret; struct kvm_context *context; vmcs = alloc_percpu(struct kvm_context); @@ -365,30 +364,8 @@ static int kvm_loongarch_env_init(void) return -ENOMEM; } - /* - * PGD register is shared between root kernel and kvm hypervisor. - * So world switch entry should be in DMW area rather than TLB area - * to avoid page fault reenter. - * - * In future if hardware pagetable walking is supported, we won't - * need to copy world switch code to DMW area. - */ - order = get_order(kvm_exception_size + kvm_enter_guest_size); - addr = (void *)__get_free_pages(GFP_KERNEL, order); - if (!addr) { - free_percpu(vmcs); - vmcs = NULL; - kfree(kvm_loongarch_ops); - kvm_loongarch_ops = NULL; - return -ENOMEM; - } - - memcpy(addr, kvm_exc_entry, kvm_exception_size); - memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size); - flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size); - kvm_loongarch_ops->exc_entry = addr; - kvm_loongarch_ops->enter_guest = addr + kvm_exception_size; - kvm_loongarch_ops->page_order = order; + kvm_loongarch_ops->exc_entry = (void *)kvm_exc_entry; + kvm_loongarch_ops->enter_guest = (void *)kvm_enter_guest; vpid_mask = read_csr_gstat(); vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT; @@ -428,16 +405,10 @@ static int kvm_loongarch_env_init(void) static void kvm_loongarch_env_exit(void) { - unsigned long addr; - if (vmcs) free_percpu(vmcs); if (kvm_loongarch_ops) { - if (kvm_loongarch_ops->exc_entry) { - addr = (unsigned long)kvm_loongarch_ops->exc_entry; - free_pages(addr, kvm_loongarch_ops->page_order); - } kfree(kvm_loongarch_ops); }
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a7fa458..e104897 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c
@@ -95,7 +95,7 @@ static int kvm_flush_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx) else kvm->stat.pages--; - *pte = ctx->invalid_entry; + kvm_set_pte(pte, ctx->invalid_entry); return 1; }
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index f1768b7..936e4ae 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S
@@ -4,9 +4,11 @@ */ #include <linux/linkage.h> +#include <linux/kvm_types.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/loongarch.h> +#include <asm/page.h> #include <asm/regdef.h> #include <asm/unwind_hints.h> @@ -100,11 +102,16 @@ * - is still in guest mode, such as pgd table/vmid registers etc, * - will fix with hw page walk enabled in future * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS + * + * PGD register is shared between root kernel and kvm hypervisor. + * So world switch entry should be in DMW area rather than TLB area + * to avoid page fault re-enter. */ .text + .p2align PAGE_SHIFT .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) - UNWIND_HINT_UNDEFINED + UNWIND_HINT_END_OF_STACK csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH @@ -190,8 +197,8 @@ kvm_restore_host_gpr a2 jr ra -SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL) SYM_CODE_END(kvm_exc_entry) +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry) /* * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) @@ -215,8 +222,8 @@ /* Save kvm_vcpu to kscratch */ csrwr a1, KVM_VCPU_KS kvm_switch_to_guest -SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL) SYM_FUNC_END(kvm_enter_guest) +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest) SYM_FUNC_START(kvm_save_fpu) fpu_save_csr a0 t1 @@ -224,6 +231,7 @@ fpu_save_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_save_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu) SYM_FUNC_START(kvm_restore_fpu) fpu_restore_double a0 t1 @@ -231,6 +239,7 @@ fpu_restore_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu) #ifdef CONFIG_CPU_HAS_LSX SYM_FUNC_START(kvm_save_lsx) @@ -239,6 +248,7 @@ lsx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx) SYM_FUNC_START(kvm_restore_lsx) lsx_restore_data a0 t1 @@ -246,6 +256,7 @@ fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx) #endif #ifdef CONFIG_CPU_HAS_LASX @@ -255,6 +266,7 @@ lasx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx) SYM_FUNC_START(kvm_restore_lasx) lasx_restore_data a0 t1 @@ -262,10 +274,8 @@ fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx) #endif - .section ".rodata" -SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) -SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) #ifdef CONFIG_CPU_HAS_LBT STACK_FRAME_NON_STANDARD kvm_restore_fpu
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 29c2aaba..8356fce 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c
@@ -96,15 +96,21 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); - __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear * timer interrupt, and CSR TVAL keeps unchanged with -1, it * avoids spurious timer interrupt */ - if (!(estat & CPU_TIMER)) + if (!(estat & CPU_TIMER)) { + __delay(2); /* Wait cycles until timer interrupt injected */ + + /* Write TVAL with max value if no TI shot */ + estat = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT); + if (!(estat & CPU_TIMER)) + write_gcsr_timertick(CSR_TCFG_VAL); gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + } return; }
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 8cc5ee1..1317c71 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c
@@ -125,7 +125,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS;
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 0dde3dd..b02698a 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c
@@ -61,11 +61,16 @@ static void acpi_release_root_info(struct acpi_pci_root_info *ci) static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) { int status; + unsigned long long pci_h = 0; struct resource_entry *entry, *tmp; struct acpi_device *device = ci->bridge; status = acpi_pci_probe_root_resources(ci); if (status > 0) { + acpi_evaluate_integer(device->handle, "PCIH", NULL, &pci_h); + if (pci_h) + return status; + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { if (entry->res->flags & IORESOURCE_MEM) { entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40);
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d233ea2..f33c7ea 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c
@@ -132,6 +132,9 @@ static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) crtc_reg = regbase; crtc_offset = 0x400; break; + default: + iounmap(regbase); + return; } for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) {
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 42aa962..9c9181b 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile
@@ -12,6 +12,8 @@ ccflags-vdso := \ $(filter -I%,$(KBUILD_CFLAGS)) \ $(filter -E%,$(KBUILD_CFLAGS)) \ + $(filter -m32,$(KBUILD_CFLAGS)) \ + $(filter -m64,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ $(CLANG_FLAGS) \
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index edab2a9..4391783 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile
@@ -174,15 +174,21 @@ # this hack. prepare: vdso_prepare vdso_prepare: prepare0 - $(if $(CONFIG_64BIT),$(Q)$(MAKE) \ - $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h) - $(if $(CONFIG_PA11)$(CONFIG_COMPAT),$(Q)$(MAKE) \ +ifdef CONFIG_64BIT + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h) +else + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h +endif endif -vdso-install-$(CONFIG_PA11) += arch/parisc/kernel/vdso32/vdso32.so +ifdef CONFIG_64BIT +vdso-install-y += arch/parisc/kernel/vdso64/vdso64.so vdso-install-$(CONFIG_COMPAT) += arch/parisc/kernel/vdso32/vdso32.so -vdso-install-$(CONFIG_64BIT) += arch/parisc/kernel/vdso64/vdso64.so +else +vdso-install-y += arch/parisc/kernel/vdso32/vdso32.so +endif install: KBUILD_IMAGE := vmlinux zinstall: KBUILD_IMAGE := vmlinuz
diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h index 5501560..e5cca3c 100644 --- a/arch/parisc/include/asm/vdso.h +++ b/arch/parisc/include/asm/vdso.h
@@ -6,13 +6,14 @@ #ifdef CONFIG_64BIT #include <generated/vdso64-offsets.h> +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) #endif #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) #include <generated/vdso32-offsets.h> -#endif - -#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) #define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) +#else +#define VDSO32_SYMBOL(tsk, name) 0UL +#endif #endif /* __ASSEMBLER__ */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2f34417..49f937c 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile
@@ -46,6 +46,9 @@ # vdso obj-y += vdso.o -obj-$(CONFIG_64BIT) += vdso64/ -obj-$(CONFIG_PA11) += vdso32/ +ifdef CONFIG_64BIT +obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ +else +obj-y += vdso32/ +endif
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index bc47bbe..b52ad70 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c
@@ -41,9 +41,7 @@ const struct dma_map_ops *hppa_dma_ops __ro_after_init; EXPORT_SYMBOL(hppa_dma_ops); -static struct device root = { - .init_name = "parisc", -}; +static struct device *root; static inline int check_dev(struct device *dev) { @@ -89,7 +87,7 @@ static int for_each_padev(int (*fn)(struct device *, void *), void * data) .obj = data, .fn = fn, }; - return device_for_each_child(&root, &recurse_data, descend_children); + return device_for_each_child(root, &recurse_data, descend_children); } /** @@ -290,7 +288,7 @@ const struct parisc_device * find_pa_parent_type(const struct parisc_device *padev, int type) { const struct device *dev = &padev->dev; - while (dev != &root) { + while (dev != root) { struct parisc_device *candidate = to_parisc_device(dev); if (candidate->id.hw_type == type) return candidate; @@ -319,7 +317,7 @@ static void get_node_path(struct device *dev, struct hardware_path *path) dev = dev->parent; } - while (dev != &root) { + while (dev != root) { if (dev_is_pci(dev)) { unsigned int devfn = to_pci_dev(dev)->devfn; path->bc[i--] = PCI_SLOT(devfn) | (PCI_FUNC(devfn)<< 5); @@ -482,7 +480,7 @@ static struct parisc_device * __init alloc_tree_node( static struct parisc_device *create_parisc_device(struct hardware_path *modpath) { int i; - struct device *parent = &root; + struct device *parent = root; for (i = 0; i < 6; i++) { if (modpath->bc[i] == -1) continue; @@ -755,7 +753,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) struct device *hwpath_to_device(struct hardware_path *modpath) { int i; - struct device *parent = &root; + struct device *parent = root; for (i = 0; i < 6; i++) { if (modpath->bc[i] == -1) continue; @@ -880,7 +878,7 @@ void __init walk_central_bus(void) { walk_native_bus(CENTRAL_BUS_ADDR, CENTRAL_BUS_ADDR + (MAX_NATIVE_DEVICES * NATIVE_DEVICE_OFFSET), - &root); + root); } static __init void print_parisc_device(struct parisc_device *dev) @@ -907,9 +905,10 @@ void __init init_parisc_bus(void) { if (bus_register(&parisc_bus_type)) panic("Could not register PA-RISC bus type\n"); - if (device_register(&root)) + + root = root_device_register("parisc"); + if (IS_ERR(root)) panic("Could not register PA-RISC root device\n"); - get_device(&root); } static __init void qemu_header(void)
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 69ef3dc..7a51539 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig
@@ -76,7 +76,6 @@ # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_RADEON=y CONFIG_FB_3DFX=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index b799c95..66eae5b 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig
@@ -76,7 +76,6 @@ CONFIG_NVRAM=y # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 04bbb37..e999671 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig
@@ -121,7 +121,6 @@ CONFIG_AGP=m CONFIG_AGP_UNINORTH=m CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_OF=y CONFIG_FB_NVIDIA=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 8bbf51b3..89bcbeb 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig
@@ -98,7 +98,6 @@ CONFIG_SENSORS_LM90=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_VGA16=y CONFIG_FB_NVIDIA=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index cc98024..5d32c27 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig
@@ -196,7 +196,6 @@ # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_AST=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=m CONFIG_FB_MATROX_MILLENIUM=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 3bf518e..6316ca4 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig
@@ -249,7 +249,6 @@ CONFIG_I2C_AMD8111=y CONFIG_I2C_PASEMI=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 0fd49f6..20cc17d 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -118,7 +118,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index ff1bed4..005536e 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig
@@ -214,7 +214,6 @@ CONFIG_DRM=m CONFIG_DRM_AST=m CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 8834dfe..368759f 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile
@@ -62,6 +62,12 @@ # 32-bit one. clang validates the values passed to these arguments during # parsing, even when -fno-stack-protector is passed afterwards. CC32FLAGSREMOVE += -mstack-protector-guard% +# ftrace is disabled for the vdso but arch/powerpc/Makefile adds this define to +# KBUILD_CPPFLAGS, which enables use of the 'patchable_function_entry' +# attribute in the 'inline' define via 'notrace'. This attribute is not +# supported for the powerpcle target, resulting in many instances of +# -Wunknown-attributes. +CC32FLAGSREMOVE += -DCC_USING_PATCHABLE_FUNCTION_ENTRY endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 470eb04..ec7a0ee 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile
@@ -16,4 +16,4 @@ KCOV_INSTRUMENT_core_$(BITS).o := n UBSAN_SANITIZE_core_$(BITS).o := n KASAN_SANITIZE_core.o := n -KASAN_SANITIZE_core_$(BITS) := n +KASAN_SANITIZE_core_$(BITS).o := n
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 554b248..57e897b 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c
@@ -52,7 +52,14 @@ int exit_vmx_usercopy(void) } EXPORT_SYMBOL(exit_vmx_usercopy); -int enter_vmx_ops(void) +/* + * Can be called from kexec copy_page() path with MMU off. The kexec + * code sets preempt_count to HARDIRQ_OFFSET so we return early here. + * Since in_interrupt() is always inline, __no_sanitize_address on this + * function is sufficient to avoid KASAN shadow memory accesses in real + * mode. + */ +int __no_sanitize_address enter_vmx_ops(void) { if (in_interrupt()) return 0;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 8b00814..2e6adf5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c
@@ -2242,6 +2242,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, const u64 last_period = event->hw.last_period; s64 prev, delta, left; int record = 0; + int mark_event = regs->dsisr & MMCRA_SAMPLE_ENABLE; if (event->hw.state & PERF_HES_STOPPED) { write_pmc(event->hw.idx, 0); @@ -2304,9 +2305,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * In ISA v3.0 and before values "0" and "7" are considered reserved. * In ISA v3.1, value "7" has been used to indicate "larx/stcx". * Drop the sample if "type" has reserved values for this field with a - * ISA version check. + * ISA version check for marked events. */ - if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && + if (mark_event && event->attr.sample_type & PERF_SAMPLE_DATA_SRC && ppmu->get_mem_data_src) { val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT; if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) {
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 7433be7..f00734f 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -477,7 +477,7 @@ int cpm1_gpiochip_add16(struct device *dev) struct device_node *np = dev->of_node; struct cpm1_gpio16_chip *cpm1_gc; struct gpio_chip *gc; - u16 mask; + u32 mask; cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL); if (!cpm1_gc) @@ -485,7 +485,7 @@ int cpm1_gpiochip_add16(struct device *dev) spin_lock_init(&cpm1_gc->lock); - if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) { + if (!of_property_read_u32(np, "fsl,cpm1-gpio-irq-mask", &mask)) { int i, j; for (i = 0, j = 0; i < 16; i++)
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 60f990a..2df9552 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -272,13 +272,12 @@ void __init pas_pci_init(void) { struct device_node *root = of_find_node_by_path("/"); struct device_node *np; - int res; pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); np = of_find_compatible_node(root, NULL, "pasemi,rootbus"); if (np) { - res = pas_add_bridge(np); + pas_add_bridge(np); of_node_put(np); } of_node_put(root);
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 12c4737..9109c21 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -950,8 +950,6 @@ static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type) static int __init ps3_register_devices(void) { - int result; - if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) return -ENODEV; @@ -959,7 +957,7 @@ static int __init ps3_register_devices(void) /* ps3_repository_dump_bus_info(); */ - result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE); + ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE); ps3_register_vuart_devices();
diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 742ec52..489a80e 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c
@@ -16,6 +16,7 @@ static void *htm_buf; static void *htm_status_buf; static void *htm_info_buf; static void *htm_caps_buf; +static void *htm_mem_buf; static u32 nodeindex; static u32 nodalchipindex; static u32 coreindexonchip; @@ -86,7 +87,7 @@ static ssize_t htm_return_check(long rc) static ssize_t htmdump_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_buf = filp->private_data; + void *htm_buf_data = filp->private_data; unsigned long page, read_size, available; loff_t offset; long rc, ret; @@ -100,7 +101,7 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, * - last three values are address, size and offset */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf), + htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf_data), PAGE_SIZE, page); ret = htm_return_check(rc); @@ -112,7 +113,61 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, available = PAGE_SIZE; read_size = min(count, available); *ppos += read_size; - return simple_read_from_buffer(ubuf, count, &offset, htm_buf, available); + return simple_read_from_buffer(ubuf, count, &offset, htm_buf_data, available); +} + +static ssize_t htmsystem_mem_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_mem_data = filp->private_data; + long rc, ret; + u64 *num_entries; + u64 to_copy = 0; + loff_t offset = 0; + u64 mem_offset = 0; + + /* + * Invoke H_HTM call with: + * - operation as htm status (H_HTM_OP_STATUS) + * - last three values as addr, size and offset. "offset" + * is value from output buffer header that points to next + * entry to dump. 0 is the first entry to dump. next entry + * is read from the output bufferbyte offset 0x8. + * + * When first time hcall is invoked, mem_offset should be + * zero because zero is the first entry. + * In the next hcall, offset of next entry to read from is + * picked from output buffer header itself. So don't fill + * mem_offset for first read. + * + * If there is no further data to read in next iteration, + * offset value from output buffer header will point to -1. + */ + if (*ppos) { + mem_offset = *(u64 *)(htm_mem_data + 0x8); + if (mem_offset == -1) + return 0; + } + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_DUMP_SYSMEM_CONF, virt_to_phys(htm_mem_data), + PAGE_SIZE, be64_to_cpu(mem_offset)); + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall returned for op: H_HTM_OP_DUMP_SYSMEM_CONF with hcall returning %ld\n", ret); + return ret; + } + + /* + * HTM system mem buffer, start of buffer + 0x10 gives the + * number of HTM entries in the buffer. + * So total count to copy is: + * 32 bytes (for first 5 fields) + (number of HTM entries * entry size) + */ + num_entries = htm_mem_data + 0x10; + to_copy = 32 + (be64_to_cpu(*num_entries) * 32); + + *ppos += to_copy; + return simple_read_from_buffer(ubuf, count, &offset, htm_mem_data, to_copy); } static const struct file_operations htmdump_fops = { @@ -121,6 +176,12 @@ static const struct file_operations htmdump_fops = { .open = simple_open, }; +static const struct file_operations htmsystem_mem_fops = { + .llseek = NULL, + .read = htmsystem_mem_read, + .open = simple_open, +}; + static int htmconfigure_set(void *data, u64 val) { long rc, ret; @@ -226,20 +287,31 @@ static int htmstart_get(void *data, u64 *val) static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_status_buf = filp->private_data; + void *htm_status_data = filp->private_data; long rc, ret; u64 *num_entries; u64 to_copy; int htmstatus_flag; + loff_t offset = 0; + u64 status_offset = 0; /* * Invoke H_HTM call with: * - operation as htm status (H_HTM_OP_STATUS) - * - last three values as addr, size and offset + * - last three values as addr, size and offset. + * "offset" is value from output buffer header + * that points to next entry to dump. 0 is the first + * entry to dump. next entry is read from the output + * bufferbyte offset 0x8. */ + if (*ppos) { + status_offset = *(u64 *)(htm_status_data + 0x8); + if (status_offset == -1) + return 0; + } rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_buf), - PAGE_SIZE, 0); + htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_data), + PAGE_SIZE, be64_to_cpu(status_offset)); ret = htm_return_check(rc); if (ret <= 0) { @@ -255,13 +327,15 @@ static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, * So total count to copy is: * 32 bytes (for first 7 fields) + (number of HTM entries * entry size) */ - num_entries = htm_status_buf + 0x10; + num_entries = htm_status_data + 0x10; if (htmtype == 0x2) htmstatus_flag = 0x8; else htmstatus_flag = 0x6; to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag); - return simple_read_from_buffer(ubuf, count, ppos, htm_status_buf, to_copy); + *ppos += to_copy; + + return simple_read_from_buffer(ubuf, count, &offset, htm_status_data, to_copy); } static const struct file_operations htmstatus_fops = { @@ -273,19 +347,30 @@ static const struct file_operations htmstatus_fops = { static ssize_t htminfo_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_info_buf = filp->private_data; + void *htm_info_data = filp->private_data; long rc, ret; u64 *num_entries; u64 to_copy; + loff_t offset = 0; + u64 info_offset = 0; /* * Invoke H_HTM call with: * - operation as htm status (H_HTM_OP_STATUS) * - last three values as addr, size and offset + * "offset" is value from output buffer header + * that points to next entry to dump. 0 is the first + * entry to dump. next entry is read from the output + * bufferbyte offset 0x8. */ + if (*ppos) { + info_offset = *(u64 *)(htm_info_data + 0x8); + if (info_offset == -1) + return 0; + } rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_buf), - PAGE_SIZE, 0); + htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_data), + PAGE_SIZE, be64_to_cpu(info_offset)); ret = htm_return_check(rc); if (ret <= 0) { @@ -301,15 +386,17 @@ static ssize_t htminfo_read(struct file *filp, char __user *ubuf, * So total count to copy is: * 32 bytes (for first 5 fields) + (number of HTM entries * entry size) */ - num_entries = htm_info_buf + 0x10; + num_entries = htm_info_data + 0x10; to_copy = 32 + (be64_to_cpu(*num_entries) * 16); - return simple_read_from_buffer(ubuf, count, ppos, htm_info_buf, to_copy); + + *ppos += to_copy; + return simple_read_from_buffer(ubuf, count, &offset, htm_info_data, to_copy); } static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_caps_buf = filp->private_data; + void *htm_caps_data = filp->private_data; long rc, ret; /* @@ -319,7 +406,7 @@ static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, * and zero */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_buf), + htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_data), 0x80, 0); ret = htm_return_check(rc); @@ -328,7 +415,7 @@ static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, return ret; } - return simple_read_from_buffer(ubuf, count, ppos, htm_caps_buf, 0x80); + return simple_read_from_buffer(ubuf, count, ppos, htm_caps_data, 0x80); } static const struct file_operations htminfo_fops = { @@ -457,9 +544,17 @@ static int htmdump_init_debugfs(void) return -ENOMEM; } + /* Memory to present HTM system memory configuration */ + htm_mem_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!htm_mem_buf) { + pr_err("Failed to allocate htm mem buf\n"); + return -ENOMEM; + } + debugfs_create_file("htmstatus", 0400, htmdump_debugfs_dir, htm_status_buf, &htmstatus_fops); debugfs_create_file("htminfo", 0400, htmdump_debugfs_dir, htm_info_buf, &htminfo_fops); debugfs_create_file("htmcaps", 0400, htmdump_debugfs_dir, htm_caps_buf, &htmcaps_fops); + debugfs_create_file("htmsystem_mem", 0400, htmdump_debugfs_dir, htm_mem_buf, &htmsystem_mem_fops); return 0; } @@ -482,6 +577,10 @@ static void __exit htmdump_exit(void) { debugfs_remove_recursive(htmdump_debugfs_dir); kfree(htm_buf); + kfree(htm_status_buf); + kfree(htm_info_buf); + kfree(htm_caps_buf); + kfree(htm_mem_buf); } module_init(htmdump_init);
diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 14ae480..0c40bdd 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c
@@ -190,33 +190,34 @@ static int hvpipe_rtas_recv_msg(char __user *buf, int size) return -ENOMEM; } - ret = rtas_ibm_receive_hvpipe_msg(work_area, &srcID, - &bytes_written); - if (!ret) { - /* - * Recv HVPIPE RTAS is successful. - * When releasing FD or no one is waiting on the - * specific source, issue recv HVPIPE RTAS call - * so that pipe is not blocked - this func is called - * with NULL buf. - */ - if (buf) { - if (size < bytes_written) { - pr_err("Received the payload size = %d, but the buffer size = %d\n", - bytes_written, size); - bytes_written = size; - } - ret = copy_to_user(buf, - rtas_work_area_raw_buf(work_area), - bytes_written); - if (!ret) - ret = bytes_written; - } - } else { - pr_err("ibm,receive-hvpipe-msg failed with %d\n", - ret); + /* + * Recv HVPIPE RTAS is successful. + * When releasing FD or no one is waiting on the + * specific source, issue recv HVPIPE RTAS call + * so that pipe is not blocked - this func is called + * with NULL buf. + */ + ret = rtas_ibm_receive_hvpipe_msg(work_area, &srcID, &bytes_written); + if (ret) { + pr_err("ibm,receive-hvpipe-msg failed with %d\n", ret); + goto out; } + if (!buf) + goto out; + + if (size < bytes_written) { + pr_err("Received the payload size = %d, but the buffer size = %d\n", + bytes_written, size); + bytes_written = size; + } + + if (copy_to_user(buf, rtas_work_area_raw_buf(work_area), bytes_written)) + ret = -EFAULT; + else + ret = bytes_written; + +out: rtas_work_area_free(work_area); return ret; } @@ -327,8 +328,8 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, { struct hvpipe_source_info *src_info = file->private_data; - struct papr_hvpipe_hdr hdr; - long ret; + struct papr_hvpipe_hdr hdr = {}; + ssize_t ret = 0; /* * Return -ENXIO during migration @@ -376,7 +377,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, ret = copy_to_user(buf, &hdr, HVPIPE_HDR_LEN); if (ret) - return ret; + return -EFAULT; /* * Message event has payload, so get the payload with @@ -385,19 +386,23 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, if (hdr.flags & HVPIPE_MSG_AVAILABLE) { ret = hvpipe_rtas_recv_msg(buf + HVPIPE_HDR_LEN, size - HVPIPE_HDR_LEN); - if (ret > 0) { + /* + * Always clear MSG_AVAILABLE once the RTAS call has drained + * the message, regardless of whether copy_to_user succeeded. + */ + if (ret >= 0 || ret == -EFAULT) src_info->hvpipe_status &= ~HVPIPE_MSG_AVAILABLE; - ret += HVPIPE_HDR_LEN; - } } else if (hdr.flags & HVPIPE_LOST_CONNECTION) { /* * Hypervisor is closing the pipe for the specific * source. So notify user space. */ src_info->hvpipe_status &= ~HVPIPE_LOST_CONNECTION; - ret = HVPIPE_HDR_LEN; } + if (ret >= 0) + ret += HVPIPE_HDR_LEN; + return ret; } @@ -444,16 +449,18 @@ static int papr_hvpipe_handle_release(struct inode *inode, struct file *file) { struct hvpipe_source_info *src_info; + unsigned long flags; /* * Hold the lock, remove source from src_list, reset the * hvpipe status and release the lock to prevent any race * with message event IRQ. */ - spin_lock(&hvpipe_src_list_lock); + spin_lock_irqsave(&hvpipe_src_list_lock, flags); src_info = file->private_data; list_del(&src_info->list); file->private_data = NULL; + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); /* * If the pipe for this specific source has any pending * payload, issue recv HVPIPE RTAS so that pipe will not @@ -461,10 +468,8 @@ static int papr_hvpipe_handle_release(struct inode *inode, */ if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) { src_info->hvpipe_status = 0; - spin_unlock(&hvpipe_src_list_lock); hvpipe_rtas_recv_msg(NULL, 0); - } else - spin_unlock(&hvpipe_src_list_lock); + } kfree(src_info); return 0; @@ -479,50 +484,53 @@ static const struct file_operations papr_hvpipe_handle_ops = { static int papr_hvpipe_dev_create_handle(u32 srcID) { - struct hvpipe_source_info *src_info __free(kfree) = NULL; - - spin_lock(&hvpipe_src_list_lock); - /* - * Do not allow more than one process communicates with - * each source. - */ - src_info = hvpipe_find_source(srcID); - if (src_info) { - spin_unlock(&hvpipe_src_list_lock); - pr_err("pid(%d) is already using the source(%d)\n", - src_info->tsk->pid, srcID); - return -EALREADY; - } - spin_unlock(&hvpipe_src_list_lock); + struct hvpipe_source_info *src_info; + int fd; + unsigned long flags; src_info = kzalloc_obj(*src_info, GFP_KERNEL_ACCOUNT); if (!src_info) return -ENOMEM; src_info->srcID = srcID; - src_info->tsk = current; init_waitqueue_head(&src_info->recv_wqh); - FD_PREPARE(fdf, O_RDONLY | O_CLOEXEC, - anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, - (void *)src_info, O_RDWR)); - if (fdf.err) - return fdf.err; - - retain_and_null_ptr(src_info); - spin_lock(&hvpipe_src_list_lock); /* - * If two processes are executing ioctl() for the same - * source ID concurrently, prevent the second process to - * acquire FD. + * Do not allow more than one process communicates with + * each source. */ + spin_lock_irqsave(&hvpipe_src_list_lock, flags); if (hvpipe_find_source(srcID)) { - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + pr_err("pid(%s:%d) could not get the source(%d)\n", + current->comm, task_pid_nr(current), srcID); + kfree(src_info); return -EALREADY; } list_add(&src_info->list, &hvpipe_src_list); - spin_unlock(&hvpipe_src_list_lock); - return fd_publish(fdf); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, + (void *)src_info, O_RDWR)); + if (fd < 0) { + spin_lock_irqsave(&hvpipe_src_list_lock, flags); + list_del(&src_info->list); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + /* + * if we fail to add FD, that means no userspace program is + * polling. In that case if there is a msg pending because the + * interrupt was fired after the src_info was added to the + * global list, then let's consume it here, to unblock the + * hvpipe + */ + if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) + hvpipe_rtas_recv_msg(NULL, 0); + kfree(src_info); + return fd; + } + + return fd; } /* @@ -685,20 +693,19 @@ static int __init enable_hvpipe_IRQ(void) struct device_node *np; hvpipe_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); - if (hvpipe_check_exception_token == RTAS_UNKNOWN_SERVICE) + if (hvpipe_check_exception_token == RTAS_UNKNOWN_SERVICE) return -ENODEV; /* hvpipe events */ np = of_find_node_by_path("/event-sources/ibm,hvpipe-msg-events"); - if (np != NULL) { - request_event_sources_irqs(np, hvpipe_event_interrupt, - "HPIPE_EVENT"); - of_node_put(np); - } else { - pr_err("Can not enable hvpipe event IRQ\n"); + if (!np) { + pr_err("No device node found, could not enable hvpipe event IRQ\n"); return -ENODEV; } + request_event_sources_irqs(np, hvpipe_event_interrupt, "HPIPE_EVENT"); + of_node_put(np); + return 0; } @@ -775,23 +782,29 @@ static int __init papr_hvpipe_init(void) } ret = enable_hvpipe_IRQ(); - if (!ret) { - ret = set_hvpipe_sys_param(1); - if (!ret) - ret = misc_register(&papr_hvpipe_dev); - } + if (ret) + goto out_wq; - if (!ret) { - pr_info("hvpipe feature is enabled\n"); - hvpipe_feature = true; - return 0; - } + ret = misc_register(&papr_hvpipe_dev); + if (ret) + goto out_wq; - pr_err("hvpipe feature is not enabled %d\n", ret); + ret = set_hvpipe_sys_param(1); + if (ret) + goto out_misc; + + pr_info("hvpipe feature is enabled\n"); + hvpipe_feature = true; + return 0; + +out_misc: + misc_deregister(&papr_hvpipe_dev); +out_wq: destroy_workqueue(papr_hvpipe_wq); out: kfree(papr_hvpipe_work); papr_hvpipe_work = NULL; + pr_err("hvpipe feature is not enabled %d\n", ret); return ret; } machine_device_initcall(pseries, papr_hvpipe_init);
diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.h b/arch/powerpc/platforms/pseries/papr-hvpipe.h index c343f42..4bdf7bb 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.h +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.h
@@ -21,7 +21,6 @@ struct hvpipe_source_info { u32 srcID; u32 hvpipe_status; wait_queue_head_t recv_wqh; /* wake up poll() waitq */ - struct task_struct *tsk; }; /*
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 810ab21..4b9e105 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c
@@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_event *event) return event->hw.event_base_rdpmc; } -static inline int match_prev_assignment(struct hw_perf_event *hwc, +static inline int match_prev_assignment(struct perf_event *event, struct cpu_hw_events *cpuc, int i) { + struct hw_perf_event *hwc = &event->hw; + return hwc->idx == cpuc->assign[i] && - hwc->last_cpu == smp_processor_id() && - hwc->last_tag == cpuc->tags[i]; + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i] && + !is_acr_event_group(event); } static void x86_pmu_start(struct perf_event *event, int flags); @@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *pmu) * - no other event has used the counter since */ if (hwc->idx == -1 || - match_prev_assignment(hwc, cpuc, i)) + match_prev_assignment(event, cpuc, i)) continue; /* @@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *pmu) event = cpuc->event_list[i]; hwc = &event->hw; - if (!match_prev_assignment(hwc, cpuc, i)) + if (!match_prev_assignment(event, cpuc, i)) x86_assign_hw_event(event, cpuc, i); else if (i < n_running) continue;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d9488ad..dd1e3aa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c
@@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event) intel_set_masks(event, idx); /* - * Enable IRQ generation (0x8), if not PEBS, - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: + * Enable IRQ generation (0x8), if not PEBS or self-reloaded + * ACR event, and enable ring-3 counting (0x2) and ring-0 + * counting (0x1) if requested: */ - if (!event->attr.precise_ip) + if (!event->attr.precise_ip && !is_acr_self_reload_event(event)) bits |= INTEL_FIXED_0_ENABLE_PMI; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) bits |= INTEL_FIXED_0_USER; @@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_set_masks(event, idx); static_call_cond(intel_pmu_enable_acr_event)(event); static_call_cond(intel_pmu_enable_event_ext)(event); + /* + * For self-reloaded ACR event, don't enable PMI since + * HW won't set overflow bit in GLOBAL_STATUS. Otherwise, + * the PMI would be recognized as a suspicious NMI. + */ + if (is_acr_self_reload_event(event)) + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + else if (!event->attr.precise_ip) + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: @@ -3332,23 +3341,41 @@ static void intel_pmu_enable_event(struct perf_event *event) static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) { struct perf_event *event, *leader; - int i, j, idx; + int i, j, k, bit, idx; + /* + * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). + * Disabling an ACR event causes bit-shifting errors in the acr_mask of + * remaining group members. As ACR sampling requires all events to be active, + * this limitation is acceptable for now. Revisit if independent event toggling + * is required. + */ for (i = 0; i < cpuc->n_events; i++) { leader = cpuc->event_list[i]; if (!is_acr_event_group(leader)) continue; - /* The ACR events must be contiguous. */ + /* Find the last event of the ACR group. */ for (j = i; j < cpuc->n_events; j++) { event = cpuc->event_list[j]; if (event->group_leader != leader->group_leader) break; - for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { - if (i + idx >= cpuc->n_events || - !is_acr_event_group(cpuc->event_list[i + idx])) - return; - __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + + /* + * Translate the user-space ACR mask (attr.config2) into the physical + * counter bitmask (hw.config1) for each ACR event in the group. + * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). + */ + for (k = i; k < j; k++) { + event = cpuc->event_list[k]; + event->hw.config1 = 0; + for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + idx = i + bit; + /* Event index of ACR group must locate in [i, j). */ + if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) + continue; + __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); } } i = j - 1; @@ -7504,6 +7531,7 @@ static __always_inline void intel_pmu_init_pnc(struct pmu *pmu) hybrid(pmu, event_constraints) = intel_pnc_event_constraints; hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; hybrid(pmu, extra_regs) = intel_pnc_extra_regs; + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); } static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index fad87d3..524668d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h
@@ -137,6 +137,16 @@ static inline bool is_acr_event_group(struct perf_event *event) return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); } +static inline bool is_acr_self_reload_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < 0) + return false; + + return test_bit(hwc->idx, (unsigned long *)&hwc->config1); +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index dc8fe13..be58b7f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h
@@ -137,7 +137,8 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); -extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); +extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs); extern void efi_unmap_boot_services(void); void arch_efi_call_virt_setup(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a14a0f4..86554de 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h
@@ -803,9 +803,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d9ae6a..2f8e8ff 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c
@@ -989,6 +989,9 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) /* Correct misconfigured CPUID on some clients. */ clear_cpu_cap(c, X86_FEATURE_INVLPGB); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN2_BP_CFG_BUG_FIX_BIT); } static void init_amd_zen3(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2a99927..eb72537 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c
@@ -450,6 +450,10 @@ __init static int append_e820_table(struct boot_e820_entry *entries, u32 nr_entr { struct boot_e820_entry *entry = entries; + /* If there aren't any entries, we'll want to fall back to another source: */ + if (!nr_entries) + return -ENOENT; + while (nr_entries) { u64 start = entry->addr; u64 size = entry->size; @@ -458,7 +462,7 @@ __init static int append_e820_table(struct boot_e820_entry *entries, u32 nr_entr /* Ignore the remaining entries on 64-bit overflow: */ if (start > end && likely(size)) - return -1; + return -EINVAL; e820__range_add(start, size, type);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 9b140bb..4438eca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c
@@ -2040,7 +2040,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * flush). Translate the address here so the memory can be uniformly * read with kvm_read_guest(). */ - if (!hc->fast && is_guest_mode(vcpu)) { + if (!hc->fast && mmu_is_nested(vcpu)) { hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); if (unlikely(hc->ingpa == INVALID_GPA)) return HV_STATUS_INVALID_HYPERCALL_INPUT;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e3ec4d8..4078e62 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c
@@ -667,13 +667,15 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) u32 *__pir = (void *)pir_vals; u32 i, vec; u32 irr_val, prev_irr_val; - int max_updated_irr; + int max_new_irr; - max_updated_irr = -1; - *max_irr = -1; - - if (!pi_harvest_pir(pir, pir_vals)) + if (!pi_harvest_pir(pir, pir_vals)) { + *max_irr = apic_find_highest_vector(regs + APIC_IRR); return false; + } + + max_new_irr = -1; + *max_irr = -1; for (i = vec = 0; i <= 7; i++, vec += 32) { u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10); @@ -688,25 +690,25 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) !try_cmpxchg(p_irr, &prev_irr_val, irr_val)); if (prev_irr_val != irr_val) - max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec; + max_new_irr = __fls(irr_val ^ prev_irr_val) + vec; } if (irr_val) *max_irr = __fls(irr_val) + vec; } - return ((max_updated_irr != -1) && - (max_updated_irr == *max_irr)); + return max_new_irr != -1 && max_new_irr == *max_irr; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_apic_update_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr) { struct kvm_lapic *apic = vcpu->arch.apic; - bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr); + bool max_irr_is_from_pir; - if (unlikely(!apic->apicv_active && irr_updated)) + max_irr_is_from_pir = __kvm_apic_update_irr(pir, apic->regs, max_irr); + if (unlikely(!apic->apicv_active && max_irr_is_from_pir)) apic->irr_pending = true; - return irr_updated; + return max_irr_is_from_pir; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_irr);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 24fbc9e..8922462 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c
@@ -182,6 +182,8 @@ static struct kmem_cache *pte_list_desc_cache; struct kmem_cache *mmu_page_header_cache; static void mmu_spte_set(u64 *sptep, u64 spte); +static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, + u64 *spte, struct list_head *invalid_list); struct kvm_mmu_role_regs { const unsigned long cr0; @@ -1287,19 +1289,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) rmap_remove(kvm, sptep); } -static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) -{ - struct kvm_mmu_page *sp; - - sp = sptep_to_sp(sptep); - WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); - - drop_spte(kvm, sptep); - - if (flush) - kvm_flush_remote_tlbs_sptep(kvm, sptep); -} - /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. @@ -2466,7 +2455,8 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, { union kvm_mmu_page_role role; - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) && + spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn) return ERR_PTR(-EEXIST); role = kvm_mmu_child_role(sptep, direct, access); @@ -2544,13 +2534,16 @@ static void __link_shadow_page(struct kvm *kvm, BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); - /* - * If an SPTE is present already, it must be a leaf and therefore - * a large one. Drop it, and flush the TLB if needed, before - * installing sp. - */ - if (is_shadow_present_pte(*sptep)) - drop_large_spte(kvm, sptep, flush); + if (is_shadow_present_pte(*sptep)) { + struct kvm_mmu_page *parent_sp; + LIST_HEAD(invalid_list); + + parent_sp = sptep_to_sp(sptep); + WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K); + + mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list); + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true); + } spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a29896a..5c2c33a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c
@@ -7029,8 +7029,8 @@ static void vmx_set_rvi(int vector) int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vt *vt = to_vt(vcpu); + bool max_irr_is_from_pir; int max_irr; - bool got_posted_interrupt; if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) return -EIO; @@ -7042,17 +7042,22 @@ int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); - got_posted_interrupt = - kvm_apic_update_irr(vcpu, vt->pi_desc.pir, &max_irr); + max_irr_is_from_pir = kvm_apic_update_irr(vcpu, vt->pi_desc.pir, + &max_irr); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); - got_posted_interrupt = false; + max_irr_is_from_pir = false; } /* - * Newly recognized interrupts are injected via either virtual interrupt - * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is - * disabled in two cases: + * If APICv is enabled and L2 is not active, then update the Requesting + * Virtual Interrupt (RVI) portion of vmcs01.GUEST_INTR_STATUS with the + * highest priority IRR to deliver the IRQ via Virtual Interrupt + * Delivery. Note, this is required even if the highest priority IRQ + * was already pending in the IRR, as RVI isn't updated in lockstep with + * the IRR (unlike apic->irr_pending). + * + * For the cases where Virtual Interrupt Delivery can't be used: * * 1) If L2 is running and the vCPU has a new pending interrupt. If L1 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a @@ -7063,10 +7068,29 @@ int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * 2) If APICv is disabled for this vCPU, assigned devices may still * attempt to post interrupts. The posted interrupt vector will cause * a VM-Exit and the subsequent entry will call sync_pir_to_irr. + * + * In both cases, set KVM_REQ_EVENT if and only if the highest priority + * pending IRQ came from the PIR, as setting KVM_REQ_EVENT if any IRQ + * is pending may put the vCPU into an infinite loop, e.g. if the IRQ + * is blocked, then it will stay pending until an IRQ window is opened. + * + * Note! It's possible that one or more IRQs were moved from the PIR + * to the IRR _without_ max_irr_is_from_pir being true! I.e. if there + * was a higher priority IRQ already pending in the IRR. Not setting + * KVM_REQ_EVENT in this case is intentional and safe. If APICv is + * inactive, or L2 is running with exit-on-interrupt off (in vmcs12), + * i.e. without nested virtual interrupt delivery, then there's no need + * to request an IRQ window as the lower priority IRQ only needs to be + * delivered when the higher priority IRQ is dismissed from the ISR, + * i.e. on the next EOI, and EOIs are always intercepted if APICv is + * disabled or if L2 is running without nested VID. If L2 is running + * exit-on-interrupt on (in vmcs12), then the higher priority IRQ will + * trigger a nested VM-Exit, at which point KVM will re-evaluate L1's + * pending IRQs. */ if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu)) vmx_set_rvi(max_irr); - else if (got_posted_interrupt) + else if (max_irr_is_from_pir) kvm_make_request(KVM_REQ_EVENT, vcpu); return max_irr;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f0e77e0..63de8e8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c
@@ -686,7 +686,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, * avoid hanging the system. */ if (IS_ENABLED(CONFIG_EFI)) - efi_crash_gracefully_on_page_fault(address); + efi_crash_gracefully_on_page_fault(address, regs); /* Only not-present faults should be handled by KFENCE. */ if (!(error_code & X86_PF_PROT) &&
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index df24ffc..90a065f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c
@@ -761,7 +761,8 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, * @return: Returns, if the page fault is not handled. This function * will never return if the page fault is handled successfully. */ -void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) +void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_X86_64)) return; @@ -770,7 +771,7 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) * If we get an interrupt/NMI while processing an EFI runtime service * then this is a regular OOPS, not an EFI failure. */ - if (in_interrupt()) + if (!in_task()) return; /* @@ -811,6 +812,14 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) } /* + * The API does not permit entering a kernel mode FPU section with + * interrupts enabled and leaving it with interrupts disabled. So + * re-enable interrupts now if they were enabled when the page fault + * occurred. + */ + local_irq_restore(regs->flags); + + /* * Before calling EFI Runtime Service, the kernel has switched the * calling process to efi_mm. Hence, switch back to task_mm. */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ac8021c..bb95a05 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c
@@ -695,17 +695,22 @@ static void __init xen_e820_resolve_conflicts(phys_addr_t start, return; end = start + size; - entry = xen_e820_table.entries; + mapcnt = 0; - for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { + while (mapcnt < xen_e820_table.nr_entries) { + entry = xen_e820_table.entries + mapcnt; if (entry->addr >= end) return; if (entry->addr + entry->size > start && - entry->type == E820_TYPE_NVS) + entry->type == E820_TYPE_NVS) { xen_e820_swap_entry_with_ram(entry); + /* E820 map has been changed, restart loop! */ + mapcnt = 0; + continue; + } - entry++; + mapcnt++; } }
diff --git a/block/ioctl.c b/block/ioctl.c index fc3be05..ab2c9ed 100644 --- a/block/ioctl.c +++ b/block/ioctl.c
@@ -857,6 +857,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) #endif struct blk_iou_cmd { + u64 start; + u64 len; int res; bool nowait; }; @@ -946,23 +948,27 @@ int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); - const struct io_uring_sqe *sqe = cmd->sqe; u32 cmd_op = cmd->cmd_op; - uint64_t start, len; - if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || - sqe->rw_flags || sqe->file_index)) - return -EINVAL; + /* Read what we need from the SQE on the first issue */ + if (!(issue_flags & IORING_URING_CMD_REISSUE)) { + const struct io_uring_sqe *sqe = cmd->sqe; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->start = READ_ONCE(sqe->addr); + bic->len = READ_ONCE(sqe->addr3); + } bic->res = 0; bic->nowait = issue_flags & IO_URING_F_NONBLOCK; - start = READ_ONCE(sqe->addr); - len = READ_ONCE(sqe->addr3); - switch (cmd_op) { case BLOCK_URING_CMD_DISCARD: - return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + return blkdev_cmd_discard(cmd, bdev, bic->start, bic->len, + bic->nowait); } return -EINVAL; }
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 2801378..3b7b008 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -537,6 +537,26 @@ static const struct file_operations ivpu_fops = { #endif }; +static int ivpu_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, + u32 handle, u32 flags, int *prime_fd) +{ + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file_priv, handle); + if (!obj) + return -ENOENT; + + if (drm_gem_is_imported(obj)) { + /* Do not allow re-exporting */ + drm_gem_object_put(obj); + return -EOPNOTSUPP; + } + + drm_gem_object_put(obj); + + return drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); +} + static const struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, @@ -545,6 +565,7 @@ static const struct drm_driver driver = { .gem_create_object = ivpu_gem_create_object, .gem_prime_import = ivpu_gem_prime_import, + .prime_handle_to_fd = ivpu_gem_prime_handle_to_fd, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c index cc0b754..6791af3 100644 --- a/drivers/accel/qaic/qaic_ras.c +++ b/drivers/accel/qaic/qaic_ras.c
@@ -497,11 +497,11 @@ static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) qdev->ce_count++; break; case UE: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_count != UINT_MAX) qdev->ue_count++; break; case UE_NF: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_nf_count != UINT_MAX) qdev->ue_nf_count++; break; default:
diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs index ada1d1b..081d19b 100644 --- a/drivers/android/binder/range_alloc/array.rs +++ b/drivers/android/binder/range_alloc/array.rs
@@ -204,7 +204,6 @@ pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> // caller will mark them as unused, which means that they can be freed if the system comes // under memory pressure. let mut freed_range = FreedRange::interior_pages(offset, size); - #[expect(clippy::collapsible_if)] // reads better like this if offset % PAGE_SIZE != 0 { if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { freed_range.start_page_idx -= 1;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8e5f373..6d13f14 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c
@@ -900,6 +900,20 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9) return -EINVAL; + /* + * 256M is a reasonable upper bound for physical block size, + * io_min and io_opt; it aligns with the maximum physical + * block size possible in NVMe. + */ + if (p->physical_bs_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_min_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_opt_shift > ilog2(SZ_256M)) + return -EINVAL; + if (p->logical_bs_shift > p->physical_bs_shift) return -EINVAL; @@ -2397,8 +2411,14 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) { int i; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + /* Sync with ublk_cancel_cmd() */ + spin_lock(&ubq->cancel_lock); + ublk_queue_reinit(ub, ubq); + spin_unlock(&ubq->cancel_lock); + } /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; @@ -2739,6 +2759,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, { struct ublk_io *io = &ubq->ios[tag]; struct ublk_device *ub = ubq->dev; + struct io_uring_cmd *cmd = NULL; struct request *req; bool done; @@ -2761,12 +2782,15 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); - if (!done) + if (!done) { io->flags |= UBLK_IO_FLAG_CANCELED; + cmd = io->cmd; + io->cmd = NULL; + } spin_unlock(&ubq->cancel_lock); - if (!done) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags); + if (!done && cmd) + io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, issue_flags); } /* @@ -3496,8 +3520,10 @@ static void ublk_ch_uring_cmd_cb(struct io_tw_req tw_req, io_tw_token_t tw) { unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS; struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); - int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); + int ret = -ECANCELED; + if (!tw.cancel) + ret = ublk_ch_uring_cmd_local(cmd, issue_flags); if (ret != -EIOCBQUEUED) io_uring_cmd_done(cmd, ret, issue_flags); } @@ -4990,13 +5016,15 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { + /* zero out partial copy so no stale params survive */ + memset(&ub->params, 0, sizeof(ub->params)); ret = -EFAULT; } else { /* clear all we don't support yet */ ub->params.types &= UBLK_PARAM_TYPE_ALL; ret = ublk_validate_params(ub); if (ret) - ub->params.types = 0; + memset(&ub->params, 0, sizeof(ub->params)); } mutex_unlock(&ub->mutex);
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2f59c0d..a3643e6 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c
@@ -289,6 +289,9 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev) skb_put_data(skb, buf, strlen(buf)); data->boot_stage_cache = reg; + if (reg & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(hdev, "Controller device warning (boot_stage: 0x%8.8x)", reg); + reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_IPC_STATUS_REG); snprintf(buf, sizeof(buf), "ipc status: 0x%8.8x", reg); skb_put_data(skb, buf, strlen(buf)); @@ -880,8 +883,11 @@ static inline bool btintel_pcie_in_lockdown(struct btintel_pcie_data *data) static inline bool btintel_pcie_in_error(struct btintel_pcie_data *data) { - return (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR) || - (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER); + if (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(data->hdev, "Controller device warning (boot_stage: 0x%8.8x)", + data->boot_stage_cache); + + return data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER; } static void btintel_pcie_msix_gp1_handler(struct btintel_pcie_data *data) @@ -914,7 +920,8 @@ static void btintel_pcie_msix_gp0_handler(struct btintel_pcie_data *data) data->img_resp_cache = reg; if (btintel_pcie_in_error(data)) { - bt_dev_err(data->hdev, "Controller in error state"); + bt_dev_err(data->hdev, "Controller in error state (boot_stage: 0x%8.8x)", + data->boot_stage_cache); btintel_pcie_dump_debug_registers(data->hdev); return; }
diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 3c7bb70..f922abd 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h
@@ -48,7 +48,7 @@ #define BTINTEL_PCIE_CSR_BOOT_STAGE_OPFW (BIT(2)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ROM_LOCKDOWN (BIT(10)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_IML_LOCKDOWN (BIT(11)) -#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR (BIT(12)) +#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING (BIT(12)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER (BIT(13)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_HALTED (BIT(14)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_MAC_ACCESS_ON (BIT(16))
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 6fb6ca2..f70c1b0 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c
@@ -695,8 +695,13 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, if (data->evt_skb == NULL) goto err_free_wc; - /* Parse and handle the return WMT event */ - wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data; + wmt_evt = skb_pull_data(data->evt_skb, sizeof(*wmt_evt)); + if (!wmt_evt) { + bt_dev_err(hdev, "WMT event too short (%u bytes)", + data->evt_skb->len); + err = -EINVAL; + goto err_free_skb; + } if (wmt_evt->whdr.op != hdr->op) { bt_dev_err(hdev, "Wrong op received %d expected %d", wmt_evt->whdr.op, hdr->op); @@ -712,6 +717,12 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, status = BTMTK_WMT_PATCH_DONE; break; case BTMTK_WMT_FUNC_CTRL: + if (!skb_pull_data(data->evt_skb, + sizeof(wmt_evt_funcc->status))) { + err = -EINVAL; + goto err_free_skb; + } + wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt; if (be16_to_cpu(wmt_evt_funcc->status) == 0x404) status = BTMTK_WMT_ON_DONE;
diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index fa679ad..8201fa7 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c
@@ -191,6 +191,9 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; + if (!ath) + return -ENODEV; + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) {
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index b386f91..db56eea 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c
@@ -585,6 +585,9 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; + if (!bcsp) + return -ENODEV; + BT_DBG("hu %p count %d rx_state %d rx_count %ld", hu, count, bcsp->rx_state, bcsp->rx_count);
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index a889a66..7673727 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c
@@ -109,6 +109,9 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; + if (!h4) + return -ENODEV; + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) {
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index cfdf75d..d353837 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c
@@ -587,6 +587,9 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) struct h5 *h5 = hu->priv; const unsigned char *ptr = data; + if (!h5) + return -ENODEV; + BT_DBG("%s pending %zu count %d", hu->hdev->name, h5->rx_pending, count);
diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 76d61af..140ab55 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c
@@ -12,6 +12,7 @@ #include <net/bluetooth/hci_core.h> #define VERSION "0.1" +#define VIRTBT_RX_BUF_SIZE 1000 enum { VIRTBT_VQ_TX, @@ -33,11 +34,11 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt) struct sk_buff *skb; int err; - skb = alloc_skb(1000, GFP_KERNEL); + skb = alloc_skb(VIRTBT_RX_BUF_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - sg_init_one(sg, skb->data, 1000); + sg_init_one(sg, skb->data, VIRTBT_RX_BUF_SIZE); err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); if (err < 0) { @@ -197,6 +198,7 @@ static int virtbt_shutdown_generic(struct hci_dev *hdev) static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) { + size_t min_hdr; __u8 pkt_type; pkt_type = *((__u8 *) skb->data); @@ -204,16 +206,32 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) switch (pkt_type) { case HCI_EVENT_PKT: + min_hdr = sizeof(struct hci_event_hdr); + break; case HCI_ACLDATA_PKT: + min_hdr = sizeof(struct hci_acl_hdr); + break; case HCI_SCODATA_PKT: + min_hdr = sizeof(struct hci_sco_hdr); + break; case HCI_ISODATA_PKT: - hci_skb_pkt_type(skb) = pkt_type; - hci_recv_frame(vbt->hdev, skb); + min_hdr = sizeof(struct hci_iso_hdr); break; default: kfree_skb(skb); - break; + return; } + + if (skb->len < min_hdr) { + bt_dev_err_ratelimited(vbt->hdev, + "rx pkt_type 0x%02x payload %u < hdr %zu\n", + pkt_type, skb->len, min_hdr); + kfree_skb(skb); + return; + } + + hci_skb_pkt_type(skb) = pkt_type; + hci_recv_frame(vbt->hdev, skb); } static void virtbt_rx_work(struct work_struct *work) @@ -227,8 +245,15 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb_put(skb, len); - virtbt_rx_handle(vbt, skb); + if (!len || len > VIRTBT_RX_BUF_SIZE) { + bt_dev_err_ratelimited(vbt->hdev, + "rx reply len %u outside [1, %u]\n", + len, VIRTBT_RX_BUF_SIZE); + kfree_skb(skb); + } else { + skb_put(skb, len); + virtbt_rx_handle(vbt, skb); + } if (virtbt_add_inbuf(vbt) < 0) return;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 4a9e9de..9a9d12b 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -168,6 +168,10 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + bool last_was_flag_fetch; + /* Does the BMC have an event buffer? */ bool has_event_buffer; @@ -410,7 +414,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_MESSAGES; + if (smi_info->si_state != SI_GETTING_MESSAGES) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_MESSAGES; + } } static void start_getting_events(struct smi_info *smi_info) @@ -421,7 +428,10 @@ static void start_getting_events(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_EVENTS; + if (smi_info->si_state != SI_GETTING_EVENTS) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_EVENTS; + } } /* @@ -487,15 +497,19 @@ static void handle_flags(struct smi_info *smi_info) } else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) { /* Messages available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_msg_queue(smi_info); } else if (smi_info->msg_flags & EVENT_MSG_BUFFER_FULL) { /* Events available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_events(smi_info); } else if (smi_info->msg_flags & OEM_DATA_AVAIL && @@ -595,6 +609,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->si_state = SI_NORMAL; } else { smi_info->msg_flags = msg[3]; + smi_info->last_was_flag_fetch = true; handle_flags(smi_info); } break; @@ -630,7 +645,13 @@ static void handle_transaction_done(struct smi_info *smi_info) */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; - if (msg->rsp[2] != 0) { + /* + * It appears some BMCs, with no event data, return no + * data in the message and not a 0x80 error as the + * spec says they should. Shut down processing if + * the data is not the right length. + */ + if (msg->rsp[2] != 0 || msg->rsp_size != 19) { /* Error getting event, probably done. */ msg->done(msg); @@ -640,6 +661,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, events); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -678,6 +704,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, incoming_messages); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -820,6 +851,26 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, } /* + * If we are currently idle, or if the last thing that was + * done was a flag fetch and there is a message pending, try + * to start the next message. + * + * We do the waiting message check to avoid a stuck flag + * completely wedging the driver. Let a message through + * in between flag operations if that happens. + */ + if (si_sm_result == SI_SM_IDLE || + (si_sm_result == SI_SM_ATTN && smi_info->waiting_msg && + smi_info->last_was_flag_fetch)) { + smi_info->last_was_flag_fetch = false; + smi_inc_stat(smi_info, idles); + + si_sm_result = start_next_msg(smi_info); + if (si_sm_result != SI_SM_IDLE) + goto restart; + } + + /* * We prefer handling attn over new messages. But don't do * this if there is not yet an upper layer to handle anything. */ @@ -846,15 +897,6 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, } } - /* If we are currently idle, try to start the next message. */ - if (si_sm_result == SI_SM_IDLE) { - smi_inc_stat(smi_info, idles); - - si_sm_result = start_next_msg(smi_info); - if (si_sm_result != SI_SM_IDLE) - goto restart; - } - if ((si_sm_result == SI_SM_IDLE) && (atomic_read(&smi_info->req_events))) { /*
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index b49500a..f419b46 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -225,6 +225,9 @@ struct ssif_info { bool has_event_buffer; bool supports_alert; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + /* * Used to tell what we should do with alerts. If we are * waiting on a response, read the data immediately. @@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_EVENTS; + if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_EVENTS; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + handle_flags(ssif_info, flags); ssif_inc_stat(ssif_info, events); deliver_recv_msg(ssif_info, msg); @@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + ssif_inc_stat(ssif_info, incoming_messages); handle_flags(ssif_info, flags); deliver_recv_msg(ssif_info, msg); @@ -1886,6 +1905,7 @@ static int ssif_probe(struct i2c_client *client) "kssif%4.4x", thread_num); if (IS_ERR(ssif_info->thread)) { rv = PTR_ERR(ssif_info->thread); + ssif_info->thread = NULL; dev_notice(&ssif_info->client->dev, "Could not start kernel thread: error %d\n", rv);
diff --git a/drivers/clk/clk-eyeq.c b/drivers/clk/clk-eyeq.c index c1dcced..d9303c2 100644 --- a/drivers/clk/clk-eyeq.c +++ b/drivers/clk/clk-eyeq.c
@@ -110,6 +110,7 @@ struct eqc_match_data { const char *reset_auxdev_name; const char *pinctrl_auxdev_name; + const char *eth_phy_auxdev_name; unsigned int early_clk_count; }; @@ -321,38 +322,18 @@ static void eqc_probe_init_fixed_factors(struct device *dev, } } -static void eqc_auxdev_release(struct device *dev) -{ - struct auxiliary_device *adev = to_auxiliary_dev(dev); - - kfree(adev); -} - -static int eqc_auxdev_create(struct device *dev, void __iomem *base, - const char *name, u32 id) +static void eqc_auxdev_create_optional(struct device *dev, void __iomem *base, + const char *name) { struct auxiliary_device *adev; - int ret; - adev = kzalloc_obj(*adev); - if (!adev) - return -ENOMEM; - - adev->name = name; - adev->dev.parent = dev; - adev->dev.platform_data = (void __force *)base; - adev->dev.release = eqc_auxdev_release; - adev->id = id; - - ret = auxiliary_device_init(adev); - if (ret) - return ret; - - ret = auxiliary_device_add(adev); - if (ret) - auxiliary_device_uninit(adev); - - return ret; + if (name) { + adev = devm_auxiliary_device_create(dev, name, + (void __force *)base); + if (!adev) + dev_warn(dev, "failed creating auxiliary device %s.%s\n", + KBUILD_MODNAME, name); + } } static int eqc_probe(struct platform_device *pdev) @@ -364,7 +345,6 @@ static int eqc_probe(struct platform_device *pdev) unsigned int i, clk_count; struct resource *res; void __iomem *base; - int ret; data = device_get_match_data(dev); if (!data) @@ -378,21 +358,10 @@ static int eqc_probe(struct platform_device *pdev) if (!base) return -ENOMEM; - /* Init optional reset auxiliary device. */ - if (data->reset_auxdev_name) { - ret = eqc_auxdev_create(dev, base, data->reset_auxdev_name, 0); - if (ret) - dev_warn(dev, "failed creating auxiliary device %s.%s: %d\n", - KBUILD_MODNAME, data->reset_auxdev_name, ret); - } - - /* Init optional pinctrl auxiliary device. */ - if (data->pinctrl_auxdev_name) { - ret = eqc_auxdev_create(dev, base, data->pinctrl_auxdev_name, 0); - if (ret) - dev_warn(dev, "failed creating auxiliary device %s.%s: %d\n", - KBUILD_MODNAME, data->pinctrl_auxdev_name, ret); - } + /* Init optional auxiliary devices. */ + eqc_auxdev_create_optional(dev, base, data->reset_auxdev_name); + eqc_auxdev_create_optional(dev, base, data->pinctrl_auxdev_name); + eqc_auxdev_create_optional(dev, base, data->eth_phy_auxdev_name); if (data->pll_count + data->div_count + data->fixed_factor_count == 0) return 0; /* Zero clocks, we are done. */ @@ -553,6 +522,7 @@ static const struct eqc_match_data eqc_eyeq5_match_data = { .reset_auxdev_name = "reset", .pinctrl_auxdev_name = "pinctrl", + .eth_phy_auxdev_name = "phy", .early_clk_count = ARRAY_SIZE(eqc_eyeq5_early_plls) + ARRAY_SIZE(eqc_eyeq5_early_fixed_factors),
diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c index f7412b1..5a75b5c 100644 --- a/drivers/clk/clk-rk808.c +++ b/drivers/clk/clk-rk808.c
@@ -153,7 +153,7 @@ static int rk808_clkout_probe(struct platform_device *pdev) struct rk808_clkout *rk808_clkout; int ret; - dev->of_node = pdev->dev.parent->of_node; + device_set_of_node_from_dev(dev, dev->parent); rk808_clkout = devm_kzalloc(dev, sizeof(*rk808_clkout), GFP_KERNEL);
diff --git a/drivers/clk/spacemit/ccu-k3.c b/drivers/clk/spacemit/ccu-k3.c index e98afd5..bb8b75b 100644 --- a/drivers/clk/spacemit/ccu-k3.c +++ b/drivers/clk/spacemit/ccu-k3.c
@@ -846,7 +846,7 @@ static const struct clk_parent_data top_parents[] = { CCU_PARENT_HW(pll6_d3), }; CCU_MUX_DIV_GATE_FC_DEFINE(top_dclk, top_parents, APMU_TOP_DCLK_CTRL, 5, 3, - BIT(8), 2, 3, BIT(1), 0); + BIT(8), 2, 3, BIT(1), CLK_IS_CRITICAL); static const struct clk_parent_data ucie_parents[] = { CCU_PARENT_HW(pll1_d8_307p2),
diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index ec13155..97ec05d 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c
@@ -777,9 +777,9 @@ static int init_one_mc(struct mc_priv *priv, struct platform_device *pdev, int i u32 num_chans, rank, dwidth, config; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; + char name[MC_NAME_LEN]; struct device *dev; enum dev_type dt; - char *name; int rc; config = priv->adec[CONF + i * ADEC_NUM]; @@ -813,13 +813,9 @@ static int init_one_mc(struct mc_priv *priv, struct platform_device *pdev, int i layers[1].is_virt_csrow = false; rc = -ENOMEM; - name = kzalloc(MC_NAME_LEN, GFP_KERNEL); - if (!name) - return rc; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) - goto err_name_free; + return rc; mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers, sizeof(struct mc_priv)); if (!mci) { @@ -858,8 +854,6 @@ static int init_one_mc(struct mc_priv *priv, struct platform_device *pdev, int i edac_mc_free(mci); err_dev_free: kfree(dev); -err_name_free: - kfree(name); return rc; }
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index a253b61..a5db353 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c
@@ -60,8 +60,10 @@ static int efi_pstore_open(struct pstore_info *psi) return err; psi->data = kzalloc(record_size, GFP_KERNEL); - if (!psi->data) + if (!psi->data) { + efivar_unlock(); return -ENOMEM; + } return 0; }
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 983a438..cfedb30 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile
@@ -66,7 +66,7 @@ lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ - alignedmem.o relocate.o printk.o vsprintf.o + alignedmem.o printk.o vsprintf.o # include the stub's libfdt dependencies from lib/ when needed libfdt-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c \
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 979a218..fd91fc1 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1104,13 +1104,6 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); -efi_status_t efi_relocate_kernel(unsigned long *image_addr, - unsigned long image_size, - unsigned long alloc_size, - unsigned long preferred_addr, - unsigned long alignment, - unsigned long min_addr); - efi_status_t efi_parse_options(char const *cmdline); void efi_parse_option_graphics(char *option);
diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 736b6aa..c87ac70 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c
@@ -14,6 +14,86 @@ extern int kernel_asize; extern int kernel_fsize; extern int kernel_entry; +/** + * efi_relocate_kernel() - copy memory area + * @image_addr: pointer to address of memory area to copy + * @image_size: size of memory area to copy + * @alloc_size: minimum size of memory to allocate, must be greater or + * equal to image_size + * @preferred_addr: preferred target address + * @alignment: minimum alignment of the allocated memory area. It + * should be a power of two. + * @min_addr: minimum target address + * + * Copy a memory area to a newly allocated memory area aligned according + * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address + * is not available, the allocated address will not be below @min_addr. + * On exit, @image_addr is updated to the target copy address that was used. + * + * This function is used to copy the Linux kernel verbatim. It does not apply + * any relocation changes. + * + * Return: status code + */ +static +efi_status_t efi_relocate_kernel(unsigned long *image_addr, + unsigned long image_size, + unsigned long alloc_size, + unsigned long preferred_addr, + unsigned long alignment, + unsigned long min_addr) +{ + unsigned long cur_image_addr; + unsigned long new_addr = 0; + efi_status_t status; + unsigned long nr_pages; + efi_physical_addr_t efi_addr = preferred_addr; + + if (!image_addr || !image_size || !alloc_size) + return EFI_INVALID_PARAMETER; + if (alloc_size < image_size) + return EFI_INVALID_PARAMETER; + + cur_image_addr = *image_addr; + + /* + * The EFI firmware loader could have placed the kernel image + * anywhere in memory, but the kernel has restrictions on the + * max physical address it can run at. Some architectures + * also have a preferred address, so first try to relocate + * to the preferred address. If that fails, allocate as low + * as possible while respecting the required alignment. + */ + nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &efi_addr); + new_addr = efi_addr; + /* + * If preferred address allocation failed allocate as low as + * possible. + */ + if (status != EFI_SUCCESS) { + status = efi_low_alloc_above(alloc_size, alignment, &new_addr, + min_addr); + } + if (status != EFI_SUCCESS) { + efi_err("Failed to allocate usable memory for kernel.\n"); + return status; + } + + /* + * We know source/dest won't overlap since both memory ranges + * have been allocated by UEFI, so we can safely use memcpy. + */ + memcpy((void *)new_addr, (void *)cur_image_addr, image_size); + efi_cache_sync_image(new_addr, image_size); + + /* Return the new address of the relocated image. */ + *image_addr = new_addr; + + return status; +} + efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr,
diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index 9825f52..f7938d5 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c
@@ -18,6 +18,11 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; } +void efi_cache_sync_image(unsigned long image_base, unsigned long alloc_size) +{ + asm volatile ("ibar 0" ::: "memory"); +} + struct exit_boot_struct { efi_memory_desc_t *runtime_map; int runtime_entry_count;
diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 9c82259..59f3f83 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c
@@ -124,3 +124,85 @@ void efi_free(unsigned long size, unsigned long addr) nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; efi_bs_call(free_pages, addr, nr_pages); } + +/** + * efi_low_alloc_above() - allocate pages at or above given address + * @size: size of the memory area to allocate + * @align: minimum alignment of the allocated memory area. It should + * a power of two. + * @addr: on exit the address of the allocated memory + * @min: minimum address to used for the memory allocation + * + * Allocate at the lowest possible address that is not below @min as + * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at + * least EFI_ALLOC_ALIGN. The first allocated page will not below the address + * given by @min. + * + * Return: status code + */ +efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long min) +{ + struct efi_boot_memmap *map __free(efi_pool) = NULL; + efi_status_t status; + unsigned long nr_pages; + int i; + + status = efi_get_memory_map(&map, false); + if (status != EFI_SUCCESS) + return status; + + /* + * Enforce minimum alignment that EFI or Linux requires when + * requesting a specific address. We are doing page-based (or + * larger) allocations, and both the address and size must meet + * alignment constraints. + */ + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; + + size = round_up(size, EFI_ALLOC_ALIGN); + nr_pages = size / EFI_PAGE_SIZE; + for (i = 0; i < map->map_size / map->desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map->map; + u64 start, end; + + desc = efi_memdesc_ptr(m, map->desc_size, i); + + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) + continue; + + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * EFI_PAGE_SIZE; + + if (start < min) + start = min; + + start = round_up(start, align); + if ((start + size) > end) + continue; + + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &start); + if (status == EFI_SUCCESS) { + *addr = start; + break; + } + } + + if (i == map->map_size / map->desc_size) + return EFI_NOT_FOUND; + + return EFI_SUCCESS; +}
diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c deleted file mode 100644 index d4264bf..0000000 --- a/drivers/firmware/efi/libstub/relocate.c +++ /dev/null
@@ -1,166 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/efi.h> -#include <asm/efi.h> - -#include "efistub.h" - -/** - * efi_low_alloc_above() - allocate pages at or above given address - * @size: size of the memory area to allocate - * @align: minimum alignment of the allocated memory area. It should - * a power of two. - * @addr: on exit the address of the allocated memory - * @min: minimum address to used for the memory allocation - * - * Allocate at the lowest possible address that is not below @min as - * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at - * least EFI_ALLOC_ALIGN. The first allocated page will not below the address - * given by @min. - * - * Return: status code - */ -efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long min) -{ - struct efi_boot_memmap *map __free(efi_pool) = NULL; - efi_status_t status; - unsigned long nr_pages; - int i; - - status = efi_get_memory_map(&map, false); - if (status != EFI_SUCCESS) - return status; - - /* - * Enforce minimum alignment that EFI or Linux requires when - * requesting a specific address. We are doing page-based (or - * larger) allocations, and both the address and size must meet - * alignment constraints. - */ - if (align < EFI_ALLOC_ALIGN) - align = EFI_ALLOC_ALIGN; - - size = round_up(size, EFI_ALLOC_ALIGN); - nr_pages = size / EFI_PAGE_SIZE; - for (i = 0; i < map->map_size / map->desc_size; i++) { - efi_memory_desc_t *desc; - unsigned long m = (unsigned long)map->map; - u64 start, end; - - desc = efi_memdesc_ptr(m, map->desc_size, i); - - if (desc->type != EFI_CONVENTIONAL_MEMORY) - continue; - - if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) - continue; - - if (efi_soft_reserve_enabled() && - (desc->attribute & EFI_MEMORY_SP)) - continue; - - if (desc->num_pages < nr_pages) - continue; - - start = desc->phys_addr; - end = start + desc->num_pages * EFI_PAGE_SIZE; - - if (start < min) - start = min; - - start = round_up(start, align); - if ((start + size) > end) - continue; - - status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, &start); - if (status == EFI_SUCCESS) { - *addr = start; - break; - } - } - - if (i == map->map_size / map->desc_size) - return EFI_NOT_FOUND; - - return EFI_SUCCESS; -} - -/** - * efi_relocate_kernel() - copy memory area - * @image_addr: pointer to address of memory area to copy - * @image_size: size of memory area to copy - * @alloc_size: minimum size of memory to allocate, must be greater or - * equal to image_size - * @preferred_addr: preferred target address - * @alignment: minimum alignment of the allocated memory area. It - * should be a power of two. - * @min_addr: minimum target address - * - * Copy a memory area to a newly allocated memory area aligned according - * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address - * is not available, the allocated address will not be below @min_addr. - * On exit, @image_addr is updated to the target copy address that was used. - * - * This function is used to copy the Linux kernel verbatim. It does not apply - * any relocation changes. - * - * Return: status code - */ -efi_status_t efi_relocate_kernel(unsigned long *image_addr, - unsigned long image_size, - unsigned long alloc_size, - unsigned long preferred_addr, - unsigned long alignment, - unsigned long min_addr) -{ - unsigned long cur_image_addr; - unsigned long new_addr = 0; - efi_status_t status; - unsigned long nr_pages; - efi_physical_addr_t efi_addr = preferred_addr; - - if (!image_addr || !image_size || !alloc_size) - return EFI_INVALID_PARAMETER; - if (alloc_size < image_size) - return EFI_INVALID_PARAMETER; - - cur_image_addr = *image_addr; - - /* - * The EFI firmware loader could have placed the kernel image - * anywhere in memory, but the kernel has restrictions on the - * max physical address it can run at. Some architectures - * also have a preferred address, so first try to relocate - * to the preferred address. If that fails, allocate as low - * as possible while respecting the required alignment. - */ - nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; - status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, &efi_addr); - new_addr = efi_addr; - /* - * If preferred address allocation failed allocate as low as - * possible. - */ - if (status != EFI_SUCCESS) { - status = efi_low_alloc_above(alloc_size, alignment, &new_addr, - min_addr); - } - if (status != EFI_SUCCESS) { - efi_err("Failed to allocate usable memory for kernel.\n"); - return status; - } - - /* - * We know source/dest won't overlap since both memory ranges - * have been allocated by UEFI, so we can safely use memcpy. - */ - memcpy((void *)new_addr, (void *)cur_image_addr, image_size); - - /* Return the new address of the relocated image. */ - *image_addr = new_addr; - - return status; -}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46aae3f..60debd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3149,11 +3149,7 @@ static int __init amdgpu_init(void) r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r; amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3161,7 +3157,7 @@ static int __init amdgpu_init(void) /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ r = amdgpu_amdkfd_init(); if (r && r != -ENOENT) - goto error_fence; + goto error_fini_sync; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); @@ -3172,10 +3168,8 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -error_fence: +error_fini_sync: amdgpu_sync_fini(); - -error_sync: return r; } @@ -3186,7 +3180,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index bc772ca..b6f849d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { + int r; + if (adev->gart.bo != NULL) return 0; - return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, - NULL, (void *)&adev->gart.ptr); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); + if (r) + return r; + + memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size); + return 0; } /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index da39ac8..e2d5f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -32,29 +32,9 @@ #include "amdgpu.h" #include "amdgpu_userq_fence.h" -static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -} +static const struct dma_fence_ops amdgpu_userq_fence_ops; static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -231,7 +211,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); return *userq_fence ? 0 : -ENOMEM; } @@ -342,7 +322,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) amdgpu_userq_fence_driver_put(fence_drv); kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); } static void amdgpu_userq_fence_release(struct dma_fence *f) @@ -545,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); if (r) { mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); goto put_gobj_write; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d56246a..d355a0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -58,9 +58,6 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; }; -int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 95be105..86c7c2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2fc39a6..5b4121d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -31,89 +31,68 @@ #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE static int -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - -static int mes_userq_create_wptr_mapping(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + struct amdgpu_bo *obj; + struct amdgpu_vm *vm = queue->vm; + struct drm_exec exec; int ret; - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT); + if (!wptr_mapping) { + ret = -EINVAL; + goto fail_lock; + } + + obj = wptr_mapping->bo_va->base.bo; + ret = drm_exec_lock_obj(&exec, &obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; } - wptr_obj->obj = wptr_mapping->bo_va->base.bo; + wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo); if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } - - ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; - } - - ret = amdgpu_bo_reserve(wptr_obj->obj, true); - if (ret) { - DRM_ERROR("Failed to reserve wptr bo\n"); - return ret; + ret = -EINVAL; + goto fail_map; } /* TODO use eviction fence instead of pinning. */ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); if (ret) { - drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); - goto unresv_bo; + DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret); + goto fail_map; + } + + ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto fail_map; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); - amdgpu_bo_unreserve(wptr_obj->obj); + drm_exec_fini(&exec); return 0; -unresv_bo: - amdgpu_bo_unreserve(wptr_obj->obj); +fail_map: + amdgpu_bo_unref(&wptr_obj->obj); +fail_lock: + drm_exec_fini(&exec); return ret; }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 44f0f23..e64f2f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f829d65..f95bf6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1360,7 +1360,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(peer_pdd); } kfree(devices_arr); @@ -1455,7 +1455,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb) - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(peer_pdd); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8ff97bf..b7f8f7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr return false; } -/* check if there is kfd process still uses adev */ -static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) -{ - struct kfd_process *p; - struct hlist_node *p_temp; - unsigned int temp; - struct kfd_node *dev; - - mutex_lock(&kfd_processes_mutex); - - if (hash_empty(kfd_processes_table)) { - mutex_unlock(&kfd_processes_mutex); - return true; - } - - /* check if there is device still use adev */ - hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { - for (int i = 0; i < p->n_pdds; i++) { - dev = p->pdds[i]->dev; - if (dev->adev == adev) { - mutex_unlock(&kfd_processes_mutex); - return false; - } - } - } - - mutex_unlock(&kfd_processes_mutex); - - return true; -} - /** kgd2kfd_teardown_processes - gracefully tear down existing * kfd processes that use adev * @@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev) mutex_unlock(&kfd_processes_mutex); /* wait all kfd processes use adev terminate */ - while (!kgd2kfd_check_device_idle(adev)) + while (!!atomic_read(&adev->kfd.dev->kfd_processes_count)) cond_resched(); }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ab3b2e7..9185ebe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -572,7 +572,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, @@ -610,7 +610,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) dev_err(dev, "Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -1284,7 +1284,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->adev, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } /* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 163d665..7b5b122 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1554,13 +1554,13 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); void kfd_signal_process_terminate_event(struct kfd_process *p); -static inline void kfd_flush_tlb(struct kfd_process_device *pdd, - enum TLB_FLUSH_TYPE type) +static inline void kfd_flush_tlb(struct kfd_process_device *pdd) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); + amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT, + pdd->dev->xcc_mask); } static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 38085a0..35ec67d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1424,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(pdd); } return r; @@ -1571,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, } } - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } return r;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 731355b..3650e7b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
@@ -1333,12 +1333,13 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) dev_id = adev->pdev->device; - if ((dpm_table->mclk_table.count >= 2) - && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) { - smu_data->smc_state_table.MemoryLevel[1].MinVddci = - smu_data->smc_state_table.MemoryLevel[0].MinVddci; - smu_data->smc_state_table.MemoryLevel[1].MinMvdd = - smu_data->smc_state_table.MemoryLevel[0].MinMvdd; + if ((dpm_table->mclk_table.count >= 2) && + ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && + (adev->pdev->revision == 0)) { + smu_data->smc_state_table.MemoryLevel[1].MinVddc = + smu_data->smc_state_table.MemoryLevel[0].MinVddc; + smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases = + smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases; } smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel);
diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index d9b3881..6c427bc 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c
@@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv, * can't handle signals gracefully. */ if (tda998x_edid_delay_wait(priv)) - return 0; + return NULL; if (priv->rev == TDA19988) reg_clear(priv, REG_TX4, TX4_PD_RAM); @@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = { static int tda998x_get_audio_ports(struct tda998x_priv *priv, struct device_node *np) { - const u32 *port_data; + const __be32 *port_data; u32 size; int i;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a80a335..1541fc8 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; - off_t bit_end = (end % line_length) * 8; + off_t bit_end = bit_off + len * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d642426..51a887c 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c
@@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_change_handle *args = data; - struct drm_gem_object *obj; + struct drm_gem_object *obj, *idrobj; int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) @@ -1042,8 +1042,29 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); + + /* When create_tail allocs an obj idr, it needs to first alloc as NULL, + * then later replace with the correct object. This is not necessary + * here, because the only operations that could race are drm_prime + * bookkeeping, and we hold the prime lock. + */ ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, GFP_NOWAIT); + + if (ret < 0) { + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + idrobj = idr_replace(&file_priv->object_idr, NULL, handle); + if (idrobj != obj) { + idr_replace(&file_priv->object_idr, idrobj, handle); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + ret = -ENOENT; + goto out_unlock; + } + spin_unlock(&file_priv->table_lock); if (ret < 0) @@ -1055,6 +1076,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(idrobj != NULL); spin_unlock(&file_priv->table_lock); goto out_unlock; }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index df4232d7..3cc50d6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit) */ mutex_lock(&gpu->sched_lock); + ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, + NULL, xa_limit_32b, &gpu->next_user_fence, + GFP_KERNEL); + if (ret < 0) + goto out_unlock; + drm_sched_job_arm(&submit->sched_job); submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); - ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, - submit->out_fence, xa_limit_32b, - &gpu->next_user_fence, GFP_KERNEL); - if (ret < 0) { - drm_sched_job_cleanup(&submit->sched_job); - goto out_unlock; - } + + xa_store(&gpu->user_fences, submit->out_fence_id, + submit->out_fence, GFP_KERNEL); /* the scheduler holds on to the job now */ kref_get(&submit->refcount);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 29a8366..e68c954 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev) mic->bridge.of_node = dev->of_node; - drm_bridge_add(&mic->bridge); + ret = devm_drm_bridge_add(dev, &mic->bridge); + if (ret) + goto err; pm_runtime_enable(dev); @@ -443,12 +445,8 @@ static int exynos_mic_probe(struct platform_device *pdev) static void exynos_mic_remove(struct platform_device *pdev) { - struct exynos_mic *mic = platform_get_drvdata(pdev); - component_del(&pdev->dev, &exynos_mic_component_ops); pm_runtime_disable(&pdev->dev); - - drm_bridge_remove(&mic->bridge); } static const struct of_device_id exynos_mic_of_match[] = {
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 385a634..d9be7a5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - return IS_GRAPHICS_VER(i915, 9, 12) || - IS_ALDERLAKE_P(i915) || - IS_METEORLAKE(i915); + return IS_GRAPHICS_VER(i915, 9, 12) && + !HAS_FLAT_CCS(i915); } static bool has_fenced_regions(struct drm_device *drm)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 72848ed..b101e14 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, .devinit = { 0x00000001, ga100_devinit_new }, .fault = { 0x00000001, tu102_fault_new }, .fb = { 0x00000001, ga100_fb_new }, @@ -2529,7 +2530,6 @@ nv170_chipset = { .vfn = { 0x00000001, ga100_vfn_new }, .ce = { 0x000003ff, ga100_ce_new }, .fifo = { 0x00000001, ga100_fifo_new }, - .sec2 = { 0x00000001, tu102_sec2_new }, }; static const struct nvkm_device_chip @@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; - case 0x170: device->chip = &nv170_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x173: device->chip = &nv173_chipset; break; case 0x174: device->chip = &nv174_chipset; break; @@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x1b6: device->chip = &nv1b6_chipset; break; case 0x1b7: device->chip = &nv1b7_chipset; break; default: + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + if (!device->chip) { nvdev_error(device, "unknown chipset (%08x)\n", boot0); ret = -ENODEV;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index fdd820e..27a13ae 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c
@@ -41,11 +41,15 @@ ga100_gsp_flcn = { static const struct nvkm_gsp_func ga100_gsp = { .flcn = &ga100_gsp_flcn, + .fwsec = &tu102_gsp_fwsec, .sig_section = ".fwsignature_ga100", .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index dd82c76..19cb269 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c
@@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* - * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware. - * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does - * not exist. Therefore, use the existence of VBIOS to determine whether to reserve - * an FRTS region. - */ - gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0; + /* Calculate FB layout. */ + gsp->fb.wpr2.frts.size = 0x100000; gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size; gsp->fb.wpr2.boot.size = gsp->boot.fw.size; @@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* Only boot FWSEC-FRTS if it actually exists */ - if (gsp->fb.wpr2.frts.size) { - ret = nvkm_gsp_fwsec_frts(gsp); - if (WARN_ON(ret)) - return ret; - } + ret = nvkm_gsp_fwsec_frts(gsp); + if (WARN_ON(ret)) + return ret; /* Reset GSP into RISC-V mode. */ ret = gsp->func->reset(gsp);
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d6863b2..d592f4f 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig
@@ -208,6 +208,7 @@ depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DSC_HELPER select DRM_KMS_HELPER help Say Y here if you want to enable support for Himax HX83121A-based
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d5fe105..658ce64 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&ctx, 150); return ctx.accum_err;
diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c index 4f8d6d8..dbdb7e3 100644 --- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c +++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
@@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel) /* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */ msleep(200); - mipi_dsi_dcs_set_display_on(ctx->dsi); - - return 0; + return mipi_dsi_dcs_set_display_on(ctx->dsi); } static int feiyang_disable(struct drm_panel *panel)
diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b2a68e..a5e5c9e 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c
@@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&dsi_ctx, 150); return dsi_ctx.accum_err;
diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c index ebe643b..bed79aa 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c
@@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi) ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs, DRM_MODE_CONNECTOR_DSI); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = devm_regulator_bulk_get_const(&dsi->dev, ARRAY_SIZE(himax_supplies),
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 2bbb116..1e6a239 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Complete initialization. */ ret = drm_dev_register(&qdev->ddev, ent->driver_data); if (ret) - goto modeset_cleanup; + goto poll_fini; drm_client_setup(&qdev->ddev, NULL); return 0; -modeset_cleanup: +poll_fini: + drm_kms_helper_poll_fini(&qdev->ddev); qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); @@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_kms_helper_poll_fini(dev); drm_dev_unregister(dev); drm_atomic_helper_shutdown(dev); if (pci_is_vga(pdev) && pdev->revision < 5)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 22321eb..703848f 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { if ((memory_clock > 100000) && (memory_clock <= 125000)) { tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; *dram_timimg2 &= ~0x00ff0000; @@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; if ((dpm_table->mclk_table.count >= 2) && - ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) { + ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { pi->smc_state_table.MemoryLevel[1].MinVddc = pi->smc_state_table.MemoryLevel[0].MinVddc; pi->smc_state_table.MemoryLevel[1].MinVddcPhases = @@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { for (i = 0; i < table->last; i++) { if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index b7397827..360a88c 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct sti_hda *hda; struct resource *res; + int ret; DRM_INFO("%s\n", __func__); @@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev) return PTR_ERR(hda->clk_hddac); } - drm_bridge_add(&hda->bridge); + ret = devm_drm_bridge_add(dev, &hda->bridge); + if (ret) + return ret; platform_set_drvdata(pdev, hda); @@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev) static void sti_hda_remove(struct platform_device *pdev) { - struct sti_hda *hda = platform_get_drvdata(pdev); - component_del(&pdev->dev, &sti_hda_ops); - drm_bridge_remove(&hda->bridge); } static const struct of_device_id hda_of_match[] = {
diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 222e4ae..5d8dc5e 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c
@@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent ret = pcim_enable_device(pdev); if (ret) - goto err_free_dev; + return ret; pci_set_drvdata(pdev, dev); ret = bochs_load(bochs); if (ret) - goto err_free_dev; + return ret; ret = drm_dev_register(dev, 0); if (ret) - goto err_free_dev; + return ret; drm_client_setup(dev, NULL); return ret; - -err_free_dev: - drm_dev_put(dev); - return ret; } static void bochs_pci_remove(struct pci_dev *pdev)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 26a3689..278bbe7 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -206,6 +206,14 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, return NULL; } +static void __free_pages_gpu_account(struct page *p, unsigned int order, + bool reclaim) +{ + mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, + -(1 << order)); + __free_pages(p, order); +} + /* Reset the caching and pages of size 1 << order */ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order, struct page *p, bool reclaim) @@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, #endif if (!pool || !ttm_pool_uses_dma_alloc(pool)) { - mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, - -(1 << order)); - __free_pages(p, order); + __free_pages_gpu_account(p, order, reclaim); return; } @@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, */ ttm_pool_split_for_swap(restore->pool, p); copy_highpage(restore->alloced_page + i, p); - __free_pages(p, 0); + __free_pages_gpu_account(p, 0, false); } restore->restored_pages++; @@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) { shrunken += num_pages; page->private = 0; - __free_pages(page, order); + __free_pages_gpu_account(page, order, false); memset(tt->pages + i, 0, num_pages * sizeof(*tt->pages)); } @@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, } handle = shandle; tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - put_page(page); + __free_pages_gpu_account(page, 0, false); shrunken++; }
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 29c72aa..33494b8 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; - if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) { + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; + + if (!xe_uc_fw_is_available(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164e..01fe03b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; }
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 81b5f01..2b835d4 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c725cde..4af9f0d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f..5440663 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b164..150a241 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
@@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data)
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 4788996..982ee2c6 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile
@@ -201,7 +201,6 @@ obj-$(CONFIG_SENSORS_QNAP_MCU_HWMON) += qnap-mcu-hwmon.o obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o -obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o
diff --git a/drivers/hwmon/ads7871.c b/drivers/hwmon/ads7871.c index 9bfdf9e..9ee3ce0 100644 --- a/drivers/hwmon/ads7871.c +++ b/drivers/hwmon/ads7871.c
@@ -77,9 +77,13 @@ static int ads7871_read_reg8(struct spi_device *spi, int reg) static int ads7871_read_reg16(struct spi_device *spi, int reg) { int ret; + reg = reg | INST_READ_BM | INST_16BIT_BM; ret = spi_w8r16(spi, reg); - return ret; + if (ret < 0) + return ret; + + return le16_to_cpu((__force __le16)ret); } static int ads7871_write_reg8(struct spi_device *spi, int reg, u8 val)
diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index dddbd24..76f3e1d 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c
@@ -796,13 +796,13 @@ static int corsairpsu_probe(struct hid_device *hdev, const struct hid_device_id ret = corsairpsu_init(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to initialize device (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } ret = corsairpsu_fwinfo(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to query firmware (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } corsairpsu_get_criticals(priv);
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 035176a..30500b4 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c
@@ -333,7 +333,13 @@ static ssize_t show_fan(struct device *dev, struct device_attribute *devattr, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm63_data *data = lm63_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[attr->index])); + int fan; + + mutex_lock(&data->update_lock); + fan = FAN_FROM_REG(data->fan[attr->index]); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", fan); } static ssize_t set_fan(struct device *dev, struct device_attribute *dummy, @@ -366,12 +372,14 @@ static ssize_t show_pwm1(struct device *dev, struct device_attribute *devattr, int nr = attr->index; int pwm; + mutex_lock(&data->update_lock); if (data->pwm_highres) pwm = data->pwm1[nr]; else pwm = data->pwm1[nr] >= 2 * data->pwm1_freq ? 255 : (data->pwm1[nr] * 255 + data->pwm1_freq) / (2 * data->pwm1_freq); + mutex_unlock(&data->update_lock); return sprintf(buf, "%d\n", pwm); } @@ -529,6 +537,7 @@ static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr, int nr = attr->index; int temp; + mutex_lock(&data->update_lock); if (!nr) { /* * Use unsigned temperature unless its value is zero. @@ -544,7 +553,10 @@ static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr, else temp = TEMP11_FROM_REG(data->temp11[nr]); } - return sprintf(buf, "%d\n", temp + data->temp2_offset); + temp += data->temp2_offset; + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } static ssize_t set_temp11(struct device *dev, struct device_attribute *devattr, @@ -592,9 +604,14 @@ static ssize_t temp2_crit_hyst_show(struct device *dev, struct device_attribute *dummy, char *buf) { struct lm63_data *data = lm63_update_device(dev); - return sprintf(buf, "%d\n", temp8_from_reg(data, 2) - + data->temp2_offset - - TEMP8_FROM_REG(data->temp2_crit_hyst)); + int temp; + + mutex_lock(&data->update_lock); + temp = temp8_from_reg(data, 2) + data->temp2_offset + - TEMP8_FROM_REG(data->temp2_crit_hyst); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } static ssize_t show_lut_temp_hyst(struct device *dev, @@ -602,10 +619,14 @@ static ssize_t show_lut_temp_hyst(struct device *dev, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm63_data *data = lm63_update_device(dev); + int temp; - return sprintf(buf, "%d\n", lut_temp_from_reg(data, attr->index) - + data->temp2_offset - - TEMP8_FROM_REG(data->lut_temp_hyst)); + mutex_lock(&data->update_lock); + temp = lut_temp_from_reg(data, attr->index) + data->temp2_offset + - TEMP8_FROM_REG(data->lut_temp_hyst); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } /* @@ -616,7 +637,7 @@ static ssize_t temp2_crit_hyst_store(struct device *dev, struct device_attribute *dummy, const char *buf, size_t count) { - struct lm63_data *data = dev_get_drvdata(dev); + struct lm63_data *data = lm63_update_device(dev); struct i2c_client *client = data->client; long val; int err;
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index f1a1e5b..c283443 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c
@@ -137,7 +137,7 @@ static const struct lm75_params device_params[] = { }, [as6200] = { .config_reg_16bits = true, - .set_mask = 0x94C0, /* 8 sample/s, 4 CF, positive polarity */ + .set_mask = 0xC010, /* 8 sample/s, 4 CF */ .default_resolution = 12, .default_sample_time = 125, .num_sample_times = 4, @@ -286,8 +286,8 @@ static const struct lm75_params device_params[] = { }, [tmp112] = { .config_reg_16bits = true, - .set_mask = 0x60C0, /* 12-bit mode, 8 samples / second */ - .clr_mask = 1 << 15, /* no one-shot mode*/ + .set_mask = 0xC060, /* 12-bit mode, 8 samples / second */ + .clr_mask = 1 << 7, /* no one-shot mode*/ .default_resolution = 12, .default_sample_time = 125, .num_sample_times = 4, @@ -353,7 +353,7 @@ static inline int lm75_write_config(struct lm75_data *data, u16 set_mask, u16 clr_mask) { return regmap_update_bits(data->regmap, LM75_REG_CONF, - clr_mask | LM75_SHUTDOWN, set_mask); + clr_mask | set_mask | LM75_SHUTDOWN, set_mask); } static irqreturn_t lm75_alarm_handler(int irq, void *private) @@ -416,7 +416,7 @@ static int lm75_read(struct device *dev, enum hwmon_sensor_types type, switch (data->kind) { case as6200: case tmp112: - *val = (regval >> 13) & 0x1; + *val = !!(regval & BIT(13)) == !!(regval & BIT(2)); break; default: return -EINVAL;
diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 1fcd320..2617c45 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c
@@ -431,10 +431,16 @@ static int ltc2992_get_voltage(struct ltc2992_state *st, u32 reg, u32 scale, lon static int ltc2992_set_voltage(struct ltc2992_state *st, u32 reg, u32 scale, long val) { - val = DIV_ROUND_CLOSEST(val * 1000, scale); - val = val << 4; + u32 reg_val; + long vmax; - return ltc2992_write_reg(st, reg, 2, val); + vmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * scale, 1000); + val = max(val, 0L); + val = min(val, vmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * 1000, scale), + 0xFFFULL) << 4; + + return ltc2992_write_reg(st, reg, 2, reg_val); } static int ltc2992_read_gpio_alarm(struct ltc2992_state *st, int nr_gpio, u32 attr, long *val) @@ -559,9 +565,15 @@ static int ltc2992_get_current(struct ltc2992_state *st, u32 reg, u32 channel, l static int ltc2992_set_current(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; + long cmax; - reg_val = DIV_ROUND_CLOSEST(val * st->r_sense_uohm[channel], LTC2992_IADC_NANOV_LSB); - reg_val = reg_val << 4; + cmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + val = max(val, 0L); + val = min(val, cmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * st->r_sense_uohm[channel], + LTC2992_IADC_NANOV_LSB), + 0xFFFULL) << 4; return ltc2992_write_reg(st, reg, 2, reg_val); } @@ -625,8 +637,10 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon if (reg_val < 0) return reg_val; - *val = mul_u64_u32_div(reg_val, LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB, - st->r_sense_uohm[channel] * 1000); + *val = mul_u64_u32_div(reg_val, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); return 0; } @@ -634,9 +648,18 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon static int ltc2992_set_power(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; + u64 pmax, uval; - reg_val = mul_u64_u32_div(val, st->r_sense_uohm[channel] * 1000, - LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB); + uval = max(val, 0L); + pmax = mul_u64_u32_div(0xFFFFFFULL, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + uval = min(uval, pmax); + reg_val = min(mul_u64_u32_div(uval, st->r_sense_uohm[channel], + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB), + 0xFFFFFFULL); return ltc2992_write_reg(st, reg, 3, reg_val); }
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 70cb582..53d9df7 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -895,8 +895,6 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, f7_msg->result = 0; f7_msg->stop = (i2c_dev->msg_id >= i2c_dev->msg_num - 1); - reinit_completion(&i2c_dev->complete); - cr1 = readl_relaxed(base + STM32F7_I2C_CR1); cr2 = readl_relaxed(base + STM32F7_I2C_CR2); @@ -1728,6 +1726,8 @@ static int stm32f7_i2c_xfer_core(struct i2c_adapter *i2c_adap, if (ret) goto pm_free; + reinit_completion(&i2c_dev->complete); + stm32f7_i2c_xfer_msg(i2c_dev, msgs); if (!i2c_dev->atomic) @@ -2253,7 +2253,7 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) snprintf(adap->name, sizeof(adap->name), "STM32F7 I2C(%pa)", &res->start); adap->owner = THIS_MODULE; - adap->timeout = 2 * HZ; + adap->timeout = 8 * HZ; adap->retries = 3; adap->algo = &stm32f7_i2c_algo; adap->dev.parent = &pdev->dev;
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 2cbd31f..28c0e48 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c
@@ -371,6 +371,7 @@ static const struct acpi_device_id i2c_acpi_force_100khz_device_ids[] = { * a 400KHz frequency. The root cause of the issue is not known. */ { "DLL0945", 0 }, + { "ELAN0678", 0 }, { "ELAN06FA", 0 }, {} };
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9c46147..a2132d7 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c
@@ -445,8 +445,7 @@ static int i2c_init_recovery(struct i2c_adapter *adap) bri->set_scl = set_scl_gpio_value; if (bri->sda_gpiod) { bri->get_sda = get_sda_gpio_value; - /* FIXME: add proper flag instead of '0' once available */ - if (gpiod_get_direction(bri->sda_gpiod) == 0) + if (gpiod_get_direction(bri->sda_gpiod) == GPIO_LINE_DIRECTION_OUT) bri->set_sda = set_sda_gpio_value; } } else if (bri->recover_bus == i2c_generic_scl_recovery) {
diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 71eb1ef..ad6acb5 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c
@@ -566,6 +566,18 @@ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, if (res) return res; + /* Reject invalid caller-supplied block lengths before any + * tracepoint or native smbus_xfer callback runs. + */ + if (data && + (protocol == I2C_SMBUS_I2C_BLOCK_DATA || + protocol == I2C_SMBUS_BLOCK_PROC_CALL || + (protocol == I2C_SMBUS_BLOCK_DATA && + read_write == I2C_SMBUS_WRITE)) && + (data->block[0] == 0 || + data->block[0] > I2C_SMBUS_BLOCK_MAX)) + return -EINVAL; + /* If enabled, the following two tracepoints are conditional on * read_write and protocol. */
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 7bbe026..ccaac5e 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c
@@ -487,12 +487,13 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) client->adapter->retries = arg; break; case I2C_TIMEOUT: - if (arg > INT_MAX) + /* + * For historical reasons, user-space sets the timeout value in + * units of 10 ms. + */ + if (arg > INT_MAX / 10) return -EINVAL; - /* For historical reasons, user-space sets the timeout - * value in units of 10 ms. - */ client->adapter->timeout = msecs_to_jiffies(arg * 10); break; default:
diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 6de4307..871c584 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c
@@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/slab.h> -#include <linux/workqueue.h> /* FIXME: is system_long_wq the best choice? */ +#include <linux/workqueue.h> #define TU_VERSION_MAX_LENGTH 128 @@ -124,7 +124,7 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, case I2C_SLAVE_STOP: if (tu->reg_idx == TU_NUM_REGS) { set_bit(TU_FLAG_IN_PROCESS, &tu->flags); - queue_delayed_work(system_long_wq, &tu->worker, + queue_delayed_work(system_dfl_long_wq, &tu->worker, msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); }
diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c index fbb0db4..04314e3 100644 --- a/drivers/i2c/i2c-stub.c +++ b/drivers/i2c/i2c-stub.c
@@ -214,6 +214,11 @@ static s32 stub_xfer(struct i2c_adapter *adap, u16 addr, unsigned short flags, * We ignore banks here, because banked chips don't use I2C * block transfers */ + if (data->block[0] == 0 || + data->block[0] > I2C_SMBUS_BLOCK_MAX) { + ret = -EINVAL; + break; + } if (data->block[0] > 256 - command) /* Avoid overrun */ data->block[0] = 256 - command; len = data->block[0];
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a40a765..27992c3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c
@@ -149,7 +149,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; } - len = nla_total_size(sizeof(size)); + len = nla_total_size(size); len += NLMSG_ALIGN(sizeof(*header)); skb = nlmsg_new(len, GFP_KERNEL);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a768436..91a62d2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -778,6 +778,7 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) struct ib_pd *orig_pd; struct ib_pd *new_pd; struct ib_mr *new_mr; + u32 lkey, rkey; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -846,6 +847,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) new_mr->uobject = uobj; atomic_inc(&new_pd->usecnt); new_uobj->object = new_mr; + lkey = new_mr->lkey; + rkey = new_mr->rkey; rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&new_mr->res, NULL); @@ -871,11 +874,13 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) mr->iova = cmd.hca_va; mr->length = cmd.length; } + lkey = mr->lkey; + rkey = mr->rkey; } memset(&resp, 0, sizeof(resp)); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; + resp.lkey = lkey; + resp.rkey = rkey; ret = uverbs_response(attrs, &resp, sizeof(resp));
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51afaac..9121d83 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1942,13 +1942,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) void free_pio_map(struct hfi1_devdata *dd) { + struct pio_vl_map *map; + /* Free PIO map if allocated */ if (rcu_access_pointer(dd->pio_map)) { spin_lock_irq(&dd->pio_map_lock); - pio_map_free(rcu_access_pointer(dd->pio_map)); + map = rcu_access_pointer(dd->pio_map); RCU_INIT_POINTER(dd->pio_map, NULL); spin_unlock_irq(&dd->pio_map_lock); synchronize_rcu(); + pio_map_free(map); } kfree(dd->kernel_send_context); dd->kernel_send_context = NULL;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index e5f4429..cfd9dd0 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1255,6 +1255,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) { size_t i; struct sdma_engine *sde; + struct sdma_vl_map *map; if (dd->sdma_pad_dma) { dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, @@ -1291,10 +1292,11 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) } if (rcu_access_pointer(dd->sdma_map)) { spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); + map = rcu_access_pointer(dd->sdma_map); RCU_INIT_POINTER(dd->sdma_map, NULL); spin_unlock_irq(&dd->sde_map_lock); synchronize_rcu(); + sdma_map_free(map); } kfree(dd->per_sdma); dd->per_sdma = NULL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a27ea85..bf04ee8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -47,8 +47,8 @@ static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, xa_lock_irqsave(&hr_dev->qp_table_xa, flags); qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); + if (qp && !refcount_inc_not_zero(&qp->refcount)) + qp = NULL; xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); if (!qp) @@ -1171,6 +1171,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; + unsigned long flags; int ret; mutex_init(&hr_qp->mutex); @@ -1251,13 +1252,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->ibqp.qp_num = hr_qp->qpn; hr_qp->event = hns_roce_ib_qp_event; - refcount_set(&hr_qp->refcount, 1); init_completion(&hr_qp->free); + refcount_set_release(&hr_qp->refcount, 1); return 0; err_flow_ctrl: + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_unlock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); err_store: free_qpc(hr_dev, hr_qp); err_qpc:
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index cb848e8..8b94cbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -16,8 +16,8 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); - if (srq) - refcount_inc(&srq->refcount); + if (srq && !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; xa_unlock(&srq_table->xa); if (!srq) { @@ -470,6 +470,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (ret) goto err_srqn; + srq->event = hns_roce_ib_srq_event; + init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); + if (udata) { resp.cap_flags = srq->cap_flags; resp.srqn = srq->srqn; @@ -480,10 +484,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - srq->event = hns_roce_ib_srq_event; - refcount_set(&srq->refcount, 1); - init_completion(&srq->free); - return 0; err_srqc:
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 0382a64..73a616a 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device, struct ionic_ibdev *dev = rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); - return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); + return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type);
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f4cbe21..2d68242 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c
@@ -137,8 +137,9 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs) return -EINVAL; - /* Create CQ table entry */ - WARN_ON(gc->cq_table[cq->queue.id]); + /* Create CQ table entry, sharing a CQ between WQs is not supported */ + if (gc->cq_table[cq->queue.id]) + return -EINVAL; if (cq->queue.kmem) gdma_cq = cq->queue.kmem; else
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 6455813..0fbcf44 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c
@@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); + if (rx_hash_key_len > sizeof(req->hashkey)) + return -EINVAL; + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) @@ -173,11 +176,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) { - /* Do cleanup starting with index i-1 */ - i--; + if (ret) goto fail; - } /* The GDMA regions are now owned by the WQ object */ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; @@ -197,8 +197,10 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, /* Create CQ table entry */ ret = mana_ib_install_cq_cb(mdev, cq); - if (ret) + if (ret) { + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); goto fail; + } } resp.num_entries = i; @@ -215,13 +217,15 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata create rss-qp, %d\n", ret); - goto fail; + goto err_disable_vport_rx; } kfree(mana_ind_table); return 0; +err_disable_vport_rx: + mana_disable_vport_rx(mpc); fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i];
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 5b23e5f..7678407 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -194,13 +194,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; - goto err_wrid; + goto err_srq; } init_attr->attr.max_wr = srq->msrq.max - 1; return 0; +err_srq: + mlx4_srq_free(dev->dev, &srq->msrq); err_wrid: if (udata) mlx4_ib_db_unmap_user(ucontext, &srq->db);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 109661c..6107828 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3310,7 +3310,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) * devr->c0 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->c0) + if (smp_load_acquire(&devr->c0)) return 0; mutex_lock(&devr->cq_lock); @@ -3336,7 +3336,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) } devr->p0 = pd; - devr->c0 = cq; + smp_store_release(&devr->c0, cq); unlock: mutex_unlock(&devr->cq_lock); @@ -3354,7 +3354,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) * devr->s1 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->s1) + if (smp_load_acquire(&devr->s1)) return 0; mutex_lock(&devr->srq_lock); @@ -3392,10 +3392,11 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) "Couldn't create SRQ 1 for res init, err=%pe\n", s1); ib_destroy_srq(s0); + goto unlock; } devr->s0 = s0; - devr->s1 = s1; + smp_store_release(&devr->s1, s1); unlock: mutex_unlock(&devr->srq_lock);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8f50e73..8fd0553 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1603,6 +1603,11 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (qp->rq.wqe_cnt) { + if (!rq->base.ubuffer.umem) { + err = -EINVAL; + goto err_destroy_sq; + } + rq->base.container_mibqp = qp; if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING) @@ -4692,7 +4697,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_modify_qp_resp resp = {}; struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_ib_modify_qp ucmd; + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 146d03a..a7a4f94 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -314,7 +314,14 @@ int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0); return err; } - xa_erase_irq(&table->dct_xa, dct->mqp.qpn); + + /* + * A race can occur where a concurrent create gets the same dctn + * (after hardware released it) and overwrites XA_ZERO_ENTRY with + * its new DCT before we reach here. In that case, we must not erase + * the entry as it now belongs to the new DCT. + */ + xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, NULL, 0); return 0; }
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 8b33853..c1a0881 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -683,7 +683,14 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0); return err; } - xa_erase_irq(&table->array, srq->srqn); + + /* + * A race can occur where a concurrent create gets the same srqn + * (after hardware released it) and overwrites XA_ZERO_ENTRY with + * its new SRQ before we reach here. In that case, we must not erase + * the entry as it now belongs to the new SRQ. + */ + xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, NULL, 0); mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free);
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 29488fb..f213947 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -147,7 +147,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) * UMR qp is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (dev->umrc.qp) + if (smp_load_acquire(&dev->umrc.qp)) return 0; mutex_lock(&dev->umrc.init_lock); @@ -185,7 +185,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) sema_init(&dev->umrc.sem, MAX_UMR_WR); mutex_init(&dev->umrc.lock); dev->umrc.state = MLX5_UMR_STATE_ACTIVE; - dev->umrc.qp = qp; + smp_store_release(&dev->umrc.qp, qp); mutex_unlock(&dev->umrc.init_lock); return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c17e2a5..a88cc5d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -215,7 +215,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; list_del(&mm->entry); @@ -233,7 +233,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; found = true; @@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); + ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: - ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); + ocrdma_del_mmap(uctx, db_page_addr, db_page_size); return status; }
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index bcd43dc..c7c2b41 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.qp_tab_size = vdev->dsr->caps.max_qp; ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) { - pvrdma_uar_free(vdev, &context->uar); + /* pvrdma_dealloc_ucontext() also frees the UAR */ pvrdma_dealloc_ucontext(&context->ibucontext); return -EFAULT; }
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f792147..2d5e701 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb) pkt->qp = NULL; pkt->mask |= rxe_opcode[pkt->opcode].mask; + /* + * Unknown opcodes have a zero-initialized rxe_opcode[] entry, so + * both mask and length are 0. Reject them before any length math: + * rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES + * and pass the underflowed value to rxe_crc32(), producing an + * out-of-bounds read. + */ + if (unlikely(!rxe_opcode[pkt->opcode].mask || + !rxe_opcode[pkt->opcode].length)) + goto drop; + if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE)) goto drop;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 9faf8c0..9cb2f6f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -540,7 +540,19 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } skip_check_range: - if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + /* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes. + * Reject any other length before the responder reads + * sizeof(u64) bytes from payload_addr(pkt); a shorter + * payload would read past the logical end of the packet + * into skb->head tailroom. + */ + if (resid != sizeof(u64) || pktlen != sizeof(u64) || + bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err; + } + } else if (pkt->mask & RXE_WRITE_MASK) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH;
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1342e76..834d8fa 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h
@@ -11,6 +11,9 @@ #include "amd_iommu_types.h" +extern int amd_iommu_evtlog_size; +extern int amd_iommu_pprlog_size; + irqreturn_t amd_iommu_int_thread(int irq, void *data); irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c685d37..f9f7180 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@ #include <linux/mutex.h> #include <linux/msi.h> #include <linux/list.h> +#include <linux/sizes.h> #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/iommufd.h> @@ -141,7 +142,6 @@ #define MMIO_STATUS_GALOG_INT_MASK BIT(10) /* event logging constants */ -#define EVENT_ENTRY_SIZE 0x10 #define EVENT_TYPE_SHIFT 28 #define EVENT_TYPE_MASK 0xf #define EVENT_TYPE_ILL_DEV 0x1 @@ -259,15 +259,20 @@ #define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x)) /* constants for event buffer handling */ -#define EVT_BUFFER_SIZE 8192 /* 512 entries */ -#define EVT_LEN_MASK (0x9ULL << 56) +#define EVTLOG_ENTRY_SIZE 0x10 +#define EVTLOG_SIZE_SHIFT 56 +#define EVTLOG_SIZE_DEF SZ_8K /* 512 entries */ +#define EVTLOG_LEN_MASK_DEF (0x9ULL << EVTLOG_SIZE_SHIFT) +#define EVTLOG_SIZE_MAX SZ_512K /* 32K entries */ +#define EVTLOG_LEN_MASK_MAX (0xFULL << EVTLOG_SIZE_SHIFT) /* Constants for PPR Log handling */ -#define PPR_LOG_ENTRIES 512 -#define PPR_LOG_SIZE_SHIFT 56 -#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT) -#define PPR_ENTRY_SIZE 16 -#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) +#define PPRLOG_ENTRY_SIZE 0x10 +#define PPRLOG_SIZE_SHIFT 56 +#define PPRLOG_SIZE_DEF SZ_8K /* 512 entries */ +#define PPRLOG_LEN_MASK_DEF (0x9ULL << PPRLOG_SIZE_SHIFT) +#define PPRLOG_SIZE_MAX SZ_512K /* 32K entries */ +#define PPRLOG_LEN_MASK_MAX (0xFULL << PPRLOG_SIZE_SHIFT) /* PAGE_SERVICE_REQUEST PPR Log Buffer Entry flags */ #define PPR_FLAG_EXEC 0x002 /* Execute permission requested */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 56ad020..3bdb380 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c
@@ -132,6 +132,9 @@ struct ivhd_entry { u8 uid; } __attribute__((packed)); +int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF; +int amd_iommu_pprlog_size = PPRLOG_SIZE_DEF; + /* * An AMD IOMMU memory definition structure. It defines things like exclusion * ranges for devices and regions that should be unity mapped. @@ -865,35 +868,47 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, } /* allocates the memory where the IOMMU will log its events to */ -static int __init alloc_event_buffer(struct amd_iommu *iommu) +static int __init alloc_event_buffer(void) { - iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, - EVT_BUFFER_SIZE); + struct amd_iommu *iommu; - return iommu->evt_buf ? 0 : -ENOMEM; -} - -static void iommu_enable_event_buffer(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->evt_buf == NULL); - - if (!is_kdump_kernel()) { - /* - * Event buffer is re-used for kdump kernel and setting - * of MMIO register is not required. - */ - entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); + for_each_iommu(iommu) { + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, + amd_iommu_evtlog_size); + if (!iommu->evt_buf) + return -ENOMEM; } - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + return 0; +} - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); +static void iommu_enable_event_buffer(void) +{ + struct amd_iommu *iommu; + u64 entry; + + for_each_iommu(iommu) { + BUG_ON(iommu->evt_buf == NULL); + + if (!is_kdump_kernel()) { + /* + * Event buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->evt_buf); + entry |= (amd_iommu_evtlog_size == EVTLOG_SIZE_DEF) ? + EVTLOG_LEN_MASK_DEF : EVTLOG_LEN_MASK_MAX; + + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + } + + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + } } /* @@ -984,15 +999,20 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) return 0; } -static int __init remap_event_buffer(struct amd_iommu *iommu) +static int __init remap_event_buffer(void) { + struct amd_iommu *iommu; u64 paddr; pr_info_once("Re-using event buffer from the previous kernel\n"); - paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; - iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE); + for_each_iommu(iommu) { + paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; + iommu->evt_buf = iommu_memremap(paddr, amd_iommu_evtlog_size); + if (!iommu->evt_buf) + return -ENOMEM; + } - return iommu->evt_buf ? 0 : -ENOMEM; + return 0; } static int __init remap_command_buffer(struct amd_iommu *iommu) @@ -1044,10 +1064,6 @@ static int __init alloc_iommu_buffers(struct amd_iommu *iommu) ret = remap_command_buffer(iommu); if (ret) return ret; - - ret = remap_event_buffer(iommu); - if (ret) - return ret; } else { ret = alloc_cwwb_sem(iommu); if (ret) @@ -1056,10 +1072,6 @@ static int __init alloc_iommu_buffers(struct amd_iommu *iommu) ret = alloc_command_buffer(iommu); if (ret) return ret; - - ret = alloc_event_buffer(iommu); - if (ret) - return ret; } return 0; @@ -2893,7 +2905,6 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); iommu_enable_gt(iommu); iommu_enable_ga(iommu); @@ -2957,7 +2968,6 @@ static void early_enable_iommus(void) iommu_disable_event_buffer(iommu); iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -3070,6 +3080,7 @@ static void amd_iommu_resume(void *data) for_each_iommu(iommu) early_enable_iommu(iommu); + iommu_enable_event_buffer(); amd_iommu_enable_interrupts(); } @@ -3399,6 +3410,33 @@ static __init void iommu_snp_enable(void) #endif } +static void amd_iommu_apply_erratum_snp(void) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (!amd_iommu_snp_en) + return; + + /* Errata fix for Family 0x19 */ + if (boot_cpu_data.x86 != 0x19) + return; + + /* Set event log buffer size to max */ + amd_iommu_evtlog_size = EVTLOG_SIZE_MAX; + pr_info("Applying erratum: Increase Event log size to 0x%x\n", + amd_iommu_evtlog_size); + + /* + * Set PPR log buffer size to max. + * (Family 0x19, model < 0x10 doesn't support PPR when SNP is enabled). + */ + if (boot_cpu_data.x86_model >= 0x10) { + amd_iommu_pprlog_size = PPRLOG_SIZE_MAX; + pr_info("Applying erratum: Increase PPR log size to 0x%x\n", + amd_iommu_pprlog_size); + } +#endif +} + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3435,6 +3473,21 @@ static int __init state_next(void) case IOMMU_ENABLED: register_syscore(&amd_iommu_syscore); iommu_snp_enable(); + + amd_iommu_apply_erratum_snp(); + + /* Allocate/enable event log buffer */ + if (is_kdump_kernel()) + ret = remap_event_buffer(); + else + ret = alloc_event_buffer(); + + if (ret) { + init_state = IOMMU_INIT_ERROR; + break; + } + iommu_enable_event_buffer(); + ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -4037,11 +4090,11 @@ int amd_iommu_snp_disable(void) return 0; for_each_iommu(iommu) { - ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); + ret = iommu_make_shared(iommu->evt_buf, amd_iommu_evtlog_size); if (ret) return ret; - ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); + ret = iommu_make_shared(iommu->ppr_log, amd_iommu_pprlog_size); if (ret) return ret;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0117136..f78e23f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c
@@ -1010,7 +1010,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) iommu_print_event(iommu, iommu->evt_buf + head); /* Update head pointer of hardware ring-buffer */ - head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; + head = (head + EVTLOG_ENTRY_SIZE) % amd_iommu_evtlog_size; writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } @@ -2149,7 +2149,8 @@ static void set_dte_passthrough(struct iommu_dev_data *dev_data, new->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW; new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domain->id) | - (dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0; + (dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0); + } static void set_dte_entry(struct amd_iommu *iommu,
diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index e6767c0..1f8d282 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c
@@ -20,7 +20,7 @@ int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu) { iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, - PPR_LOG_SIZE); + amd_iommu_pprlog_size); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -33,7 +33,9 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_PPR_EN); - entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + entry = iommu_virt_to_phys(iommu->ppr_log); + entry |= (amd_iommu_pprlog_size == PPRLOG_SIZE_DEF) ? + PPRLOG_LEN_MASK_DEF : PPRLOG_LEN_MASK_MAX; memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, &entry, sizeof(entry)); @@ -201,7 +203,7 @@ void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) raw[0] = raw[1] = 0UL; /* Update head pointer of hardware ring-buffer */ - head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + head = (head + PPRLOG_ENTRY_SIZE) % amd_iommu_pprlog_size; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); /* Handle PPR entry */
diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index ae9da4f..e9e605b 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h
@@ -137,7 +137,7 @@ static inline void iommu_pages_flush_incoherent(struct device *dma_dev, void *virt, size_t offset, size_t len) { - dma_sync_single_for_device(dma_dev, (uintptr_t)virt + offset, len, + dma_sync_single_for_device(dma_dev, virt_to_phys(virt) + offset, len, DMA_TO_DEVICE); } void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list,
diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen3.c b/drivers/media/platform/qcom/camss/camss-csid-gen3.c index 664245c..bd05924 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-gen3.c +++ b/drivers/media/platform/qcom/camss/camss-csid-gen3.c
@@ -48,9 +48,9 @@ #define IS_CSID_690(csid) ((csid->camss->res->version == CAMSS_8775P) \ || (csid->camss->res->version == CAMSS_8300)) #define CSID_BUF_DONE_IRQ_STATUS 0x8C -#define BUF_DONE_IRQ_STATUS_RDI_OFFSET (csid_is_lite(csid) ?\ - 1 : (IS_CSID_690(csid) ?\ - 13 : 14)) +#define BUF_DONE_IRQ_STATUS_RDI_OFFSET (csid_is_lite(csid) ? \ + ((IS_CSID_690(csid) ? 0 : 1)) : \ + ((IS_CSID_690(csid) ? 13 : 14))) #define CSID_BUF_DONE_IRQ_MASK 0x90 #define CSID_BUF_DONE_IRQ_CLEAR 0x94 #define CSID_BUF_DONE_IRQ_SET 0x98
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c index 6262339..78a1b56 100644 --- a/drivers/media/platform/qcom/camss/camss-csiphy.c +++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -558,12 +558,16 @@ static int csiphy_init_formats(struct v4l2_subdev *sd, return csiphy_set_format(sd, fh ? fh->state : NULL, &format); } -static bool csiphy_match_clock_name(const char *clock_name, const char *format, - int index) +static bool __printf(2, 3) +csiphy_match_clock_name(const char *clock_name, const char *format, ...) { char name[16]; /* csiphyXXX_timer\0 */ + va_list args; - snprintf(name, sizeof(name), format, index); + va_start(args, format); + vsnprintf(name, sizeof(name), format, args); + va_end(args); + return !strcmp(clock_name, name); }
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c index 00b87fd..9335636 100644 --- a/drivers/media/platform/qcom/camss/camss.c +++ b/drivers/media/platform/qcom/camss/camss.c
@@ -3598,12 +3598,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID2 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite0" }, .interrupt = { "csid_lite0" }, @@ -3617,12 +3615,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID3 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite1" }, .interrupt = { "csid_lite1" }, @@ -3636,12 +3632,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID4 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite2" }, .interrupt = { "csid_lite2" }, @@ -3655,12 +3649,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID5 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite3" }, .interrupt = { "csid_lite3" }, @@ -3674,12 +3666,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID6 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite4" }, .interrupt = { "csid_lite4" }, @@ -3752,15 +3742,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE2 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite0" }, .interrupt = { "vfe_lite0" }, @@ -3775,15 +3767,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE3 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite1" }, .interrupt = { "vfe_lite1" }, @@ -3798,15 +3792,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE4 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite2" }, .interrupt = { "vfe_lite2" }, @@ -3821,15 +3817,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE5 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite3" }, .interrupt = { "vfe_lite3" }, @@ -3844,15 +3842,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE6 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite4" }, .interrupt = { "vfe_lite4" },
diff --git a/drivers/media/platform/qcom/iris/Kconfig b/drivers/media/platform/qcom/iris/Kconfig index 3c803a0..5498f48 100644 --- a/drivers/media/platform/qcom/iris/Kconfig +++ b/drivers/media/platform/qcom/iris/Kconfig
@@ -3,7 +3,7 @@ depends on VIDEO_DEV depends on ARCH_QCOM || COMPILE_TEST select V4L2_MEM2MEM_DEV - select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_MDT_LOADER select QCOM_SCM select VIDEOBUF2_DMA_CONTIG help
diff --git a/drivers/media/platform/qcom/iris/iris_buffer.c b/drivers/media/platform/qcom/iris/iris_buffer.c index 9151f43..1d53c74 100644 --- a/drivers/media/platform/qcom/iris/iris_buffer.c +++ b/drivers/media/platform/qcom/iris/iris_buffer.c
@@ -582,10 +582,12 @@ static int iris_release_internal_buffers(struct iris_inst *inst, continue; if (!(buffer->attr & BUF_ATTR_QUEUED)) continue; - ret = hfi_ops->session_release_buf(inst, buffer); - if (ret) - return ret; buffer->attr |= BUF_ATTR_PENDING_RELEASE; + ret = hfi_ops->session_release_buf(inst, buffer); + if (ret) { + buffer->attr &= ~BUF_ATTR_PENDING_RELEASE; + return ret; + } } return 0;
diff --git a/drivers/media/platform/qcom/iris/iris_core.c b/drivers/media/platform/qcom/iris/iris_core.c index 8406c48..dbaac01 100644 --- a/drivers/media/platform/qcom/iris/iris_core.c +++ b/drivers/media/platform/qcom/iris/iris_core.c
@@ -75,6 +75,10 @@ int iris_core_init(struct iris_core *core) if (ret) goto error_unload_fw; + ret = iris_vpu_switch_to_hwmode(core); + if (ret) + goto error_unload_fw; + ret = iris_hfi_core_init(core); if (ret) goto error_unload_fw;
diff --git a/drivers/media/platform/qcom/iris/iris_hfi_common.c b/drivers/media/platform/qcom/iris/iris_hfi_common.c index 92112eb..621c665 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_common.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_common.c
@@ -159,6 +159,10 @@ int iris_hfi_pm_resume(struct iris_core *core) if (ret) goto err_suspend_hw; + ret = iris_vpu_switch_to_hwmode(core); + if (ret) + goto err_suspend_hw; + ret = ops->sys_interframe_powercollapse(core); if (ret) goto err_suspend_hw;
diff --git a/drivers/media/platform/qcom/iris/iris_hfi_queue.c b/drivers/media/platform/qcom/iris/iris_hfi_queue.c index b3ed062..bf6db23 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_queue.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_queue.c
@@ -263,7 +263,7 @@ int iris_hfi_queues_init(struct iris_core *core) GFP_KERNEL, DMA_ATTR_WRITE_COMBINE); if (!core->sfr_vaddr) { dev_err(core->dev, "sfr alloc and map failed\n"); - dma_free_attrs(core->dev, sizeof(*q_tbl_hdr), core->iface_q_table_vaddr, + dma_free_attrs(core->dev, queue_size, core->iface_q_table_vaddr, core->iface_q_table_daddr, DMA_ATTR_WRITE_COMBINE); return -ENOMEM; }
diff --git a/drivers/media/platform/qcom/iris/iris_vdec.c b/drivers/media/platform/qcom/iris/iris_vdec.c index 7192173..99d544e 100644 --- a/drivers/media/platform/qcom/iris/iris_vdec.c +++ b/drivers/media/platform/qcom/iris/iris_vdec.c
@@ -61,12 +61,6 @@ int iris_vdec_inst_init(struct iris_inst *inst) return iris_ctrls_init(inst); } -void iris_vdec_inst_deinit(struct iris_inst *inst) -{ - kfree(inst->fmt_dst); - kfree(inst->fmt_src); -} - static const struct iris_fmt iris_vdec_formats_cap[] = { [IRIS_FMT_NV12] = { .pixfmt = V4L2_PIX_FMT_NV12,
diff --git a/drivers/media/platform/qcom/iris/iris_vdec.h b/drivers/media/platform/qcom/iris/iris_vdec.h index ec1ce55..5123d2a 100644 --- a/drivers/media/platform/qcom/iris/iris_vdec.h +++ b/drivers/media/platform/qcom/iris/iris_vdec.h
@@ -9,7 +9,6 @@ struct iris_inst; int iris_vdec_inst_init(struct iris_inst *inst); -void iris_vdec_inst_deinit(struct iris_inst *inst); int iris_vdec_enum_fmt(struct iris_inst *inst, struct v4l2_fmtdesc *f); int iris_vdec_try_fmt(struct iris_inst *inst, struct v4l2_format *f); int iris_vdec_s_fmt(struct iris_inst *inst, struct v4l2_format *f);
diff --git a/drivers/media/platform/qcom/iris/iris_venc.c b/drivers/media/platform/qcom/iris/iris_venc.c index aa27b22..4d88676 100644 --- a/drivers/media/platform/qcom/iris/iris_venc.c +++ b/drivers/media/platform/qcom/iris/iris_venc.c
@@ -79,12 +79,6 @@ int iris_venc_inst_init(struct iris_inst *inst) return iris_ctrls_init(inst); } -void iris_venc_inst_deinit(struct iris_inst *inst) -{ - kfree(inst->fmt_dst); - kfree(inst->fmt_src); -} - static const struct iris_fmt iris_venc_formats_cap[] = { [IRIS_FMT_H264] = { .pixfmt = V4L2_PIX_FMT_H264,
diff --git a/drivers/media/platform/qcom/iris/iris_venc.h b/drivers/media/platform/qcom/iris/iris_venc.h index c4db743..00c1716b 100644 --- a/drivers/media/platform/qcom/iris/iris_venc.h +++ b/drivers/media/platform/qcom/iris/iris_venc.h
@@ -9,7 +9,6 @@ struct iris_inst; int iris_venc_inst_init(struct iris_inst *inst); -void iris_venc_inst_deinit(struct iris_inst *inst); int iris_venc_enum_fmt(struct iris_inst *inst, struct v4l2_fmtdesc *f); int iris_venc_try_fmt(struct iris_inst *inst, struct v4l2_format *f); int iris_venc_s_fmt(struct iris_inst *inst, struct v4l2_format *f);
diff --git a/drivers/media/platform/qcom/iris/iris_vidc.c b/drivers/media/platform/qcom/iris/iris_vidc.c index bd38d84..5eb1786 100644 --- a/drivers/media/platform/qcom/iris/iris_vidc.c +++ b/drivers/media/platform/qcom/iris/iris_vidc.c
@@ -289,10 +289,6 @@ int iris_close(struct file *filp) v4l2_m2m_ctx_release(inst->m2m_ctx); v4l2_m2m_release(inst->m2m_dev); mutex_lock(&inst->lock); - if (inst->domain == DECODER) - iris_vdec_inst_deinit(inst); - else if (inst->domain == ENCODER) - iris_venc_inst_deinit(inst); iris_session_close(inst); iris_inst_change_state(inst, IRIS_INST_DEINIT); iris_v4l2_fh_deinit(inst, filp); @@ -304,6 +300,8 @@ int iris_close(struct file *filp) mutex_unlock(&inst->lock); mutex_destroy(&inst->ctx_q_lock); mutex_destroy(&inst->lock); + kfree(inst->fmt_src); + kfree(inst->fmt_dst); kfree(inst); return 0;
diff --git a/drivers/media/platform/qcom/iris/iris_vpu2.c b/drivers/media/platform/qcom/iris/iris_vpu2.c index 9c103a2e..01ef40f 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu2.c +++ b/drivers/media/platform/qcom/iris/iris_vpu2.c
@@ -44,4 +44,5 @@ const struct vpu_ops iris_vpu2_ops = { .power_off_controller = iris_vpu_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu2_calc_freq, + .set_hwmode = iris_vpu_set_hwmode, };
diff --git a/drivers/media/platform/qcom/iris/iris_vpu3x.c b/drivers/media/platform/qcom/iris/iris_vpu3x.c index fe4423b..3dad47b 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu3x.c +++ b/drivers/media/platform/qcom/iris/iris_vpu3x.c
@@ -234,14 +234,8 @@ static int iris_vpu35_power_on_hw(struct iris_core *core) if (ret) goto err_disable_hw_free_clk; - ret = dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); - if (ret) - goto err_disable_hw_clk; - return 0; -err_disable_hw_clk: - iris_disable_unprepare_clock(core, IRIS_HW_CLK); err_disable_hw_free_clk: iris_disable_unprepare_clock(core, IRIS_HW_FREERUN_CLK); err_disable_axi_clk: @@ -266,6 +260,7 @@ const struct vpu_ops iris_vpu3_ops = { .power_off_controller = iris_vpu_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, }; const struct vpu_ops iris_vpu33_ops = { @@ -274,6 +269,7 @@ const struct vpu_ops iris_vpu33_ops = { .power_off_controller = iris_vpu33_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, }; const struct vpu_ops iris_vpu35_ops = { @@ -283,4 +279,5 @@ const struct vpu_ops iris_vpu35_ops = { .power_on_controller = iris_vpu35_vpu4x_power_on_controller, .program_bootup_registers = iris_vpu35_vpu4x_program_bootup_registers, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, };
diff --git a/drivers/media/platform/qcom/iris/iris_vpu4x.c b/drivers/media/platform/qcom/iris/iris_vpu4x.c index a8db02c..02e100a 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu4x.c +++ b/drivers/media/platform/qcom/iris/iris_vpu4x.c
@@ -252,21 +252,10 @@ static int iris_vpu4x_power_on_hardware(struct iris_core *core) ret = iris_vpu4x_power_on_apv(core); if (ret) goto disable_hw_clocks; - - iris_vpu4x_ahb_sync_reset_apv(core); } - iris_vpu4x_ahb_sync_reset_hardware(core); - - ret = iris_vpu4x_genpd_set_hwmode(core, true, efuse_value); - if (ret) - goto disable_apv_power_domain; - return 0; -disable_apv_power_domain: - if (!(efuse_value & DISABLE_VIDEO_APV_BIT)) - iris_vpu4x_power_off_apv(core); disable_hw_clocks: iris_vpu4x_disable_hardware_clocks(core, efuse_value); disable_vpp1_power_domain: @@ -359,6 +348,18 @@ static void iris_vpu4x_power_off_hardware(struct iris_core *core) iris_disable_power_domains(core, core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN]); } +static int iris_vpu4x_set_hwmode(struct iris_core *core) +{ + u32 efuse_value = readl(core->reg_base + WRAPPER_EFUSE_MONITOR); + + if (!(efuse_value & DISABLE_VIDEO_APV_BIT)) + iris_vpu4x_ahb_sync_reset_apv(core); + + iris_vpu4x_ahb_sync_reset_hardware(core); + + return iris_vpu4x_genpd_set_hwmode(core, true, efuse_value); +} + const struct vpu_ops iris_vpu4x_ops = { .power_off_hw = iris_vpu4x_power_off_hardware, .power_on_hw = iris_vpu4x_power_on_hardware, @@ -366,4 +367,5 @@ const struct vpu_ops iris_vpu4x_ops = { .power_on_controller = iris_vpu35_vpu4x_power_on_controller, .program_bootup_registers = iris_vpu35_vpu4x_program_bootup_registers, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu4x_set_hwmode, };
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h index 12640eb..8c0d6b7 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h +++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
@@ -67,7 +67,7 @@ struct iris_inst; #define SIZE_DOLBY_RPU_METADATA (41 * 1024) #define H264_CABAC_HDR_RATIO_HD_TOT 1 #define H264_CABAC_RES_RATIO_HD_TOT 3 -#define H265D_MAX_SLICE 1200 +#define H265D_MAX_SLICE 3600 #define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T #define H265_CABAC_HDR_RATIO_HD_TOT 2 #define H265_CABAC_RES_RATIO_HD_TOT 2
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_common.c b/drivers/media/platform/qcom/iris/iris_vpu_common.c index 548e5f1..69e6126 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_common.c +++ b/drivers/media/platform/qcom/iris/iris_vpu_common.c
@@ -292,14 +292,8 @@ int iris_vpu_power_on_hw(struct iris_core *core) if (ret && ret != -ENOENT) goto err_disable_hw_clock; - ret = dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); - if (ret) - goto err_disable_hw_ahb_clock; - return 0; -err_disable_hw_ahb_clock: - iris_disable_unprepare_clock(core, IRIS_HW_AHB_CLK); err_disable_hw_clock: iris_disable_unprepare_clock(core, IRIS_HW_CLK); err_disable_power: @@ -308,6 +302,16 @@ int iris_vpu_power_on_hw(struct iris_core *core) return ret; } +int iris_vpu_set_hwmode(struct iris_core *core) +{ + return dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); +} + +int iris_vpu_switch_to_hwmode(struct iris_core *core) +{ + return core->iris_platform_data->vpu_ops->set_hwmode(core); +} + int iris_vpu35_vpu4x_power_off_controller(struct iris_core *core) { u32 clk_rst_tbl_size = core->iris_platform_data->clk_rst_tbl_size;
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_common.h b/drivers/media/platform/qcom/iris/iris_vpu_common.h index f6dffc6..dee3b13 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_common.h +++ b/drivers/media/platform/qcom/iris/iris_vpu_common.h
@@ -21,6 +21,7 @@ struct vpu_ops { int (*power_on_controller)(struct iris_core *core); void (*program_bootup_registers)(struct iris_core *core); u64 (*calc_freq)(struct iris_inst *inst, size_t data_size); + int (*set_hwmode)(struct iris_core *core); }; int iris_vpu_boot_firmware(struct iris_core *core); @@ -30,6 +31,8 @@ int iris_vpu_watchdog(struct iris_core *core, u32 intr_status); int iris_vpu_prepare_pc(struct iris_core *core); int iris_vpu_power_on_controller(struct iris_core *core); int iris_vpu_power_on_hw(struct iris_core *core); +int iris_vpu_set_hwmode(struct iris_core *core); +int iris_vpu_switch_to_hwmode(struct iris_core *core); int iris_vpu_power_on(struct iris_core *core); int iris_vpu_power_off_controller(struct iris_core *core); void iris_vpu_power_off_hw(struct iris_core *core);
diff --git a/drivers/media/platform/qcom/venus/Kconfig b/drivers/media/platform/qcom/venus/Kconfig index ffb731ec..63ee8c7 100644 --- a/drivers/media/platform/qcom/venus/Kconfig +++ b/drivers/media/platform/qcom/venus/Kconfig
@@ -4,7 +4,7 @@ depends on VIDEO_DEV && QCOM_SMEM depends on (ARCH_QCOM && ARM64 && IOMMU_API) || COMPILE_TEST select OF_DYNAMIC if ARCH_QCOM - select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_MDT_LOADER select QCOM_SCM select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b942338..44d6709 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c
@@ -25,6 +25,9 @@ #include "mt7530.h" +#define MT7530_STATS_POLL_INTERVAL (1 * HZ) +#define MT7530_STATS_RATE_LIMIT (HZ / 10) + static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs) { return container_of(pcs, struct mt753x_pcs, pcs); @@ -906,10 +909,9 @@ static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port, *ranges = mt7530_rmon_ranges; } -static void mt7530_get_stats64(struct dsa_switch *ds, int port, - struct rtnl_link_stats64 *storage) +static void mt7530_read_port_stats64(struct mt7530_priv *priv, int port, + struct rtnl_link_stats64 *storage) { - struct mt7530_priv *priv = ds->priv; uint64_t data; /* MIB counter doesn't provide a FramesTransmittedOK but instead @@ -951,6 +953,54 @@ static void mt7530_get_stats64(struct dsa_switch *ds, int port, &storage->rx_crc_errors); } +static void mt7530_stats_refresh(struct mt7530_priv *priv) +{ + struct rtnl_link_stats64 stats = {}; + struct dsa_port *dp; + int port; + + dsa_switch_for_each_user_port(dp, priv->ds) { + port = dp->index; + + mt7530_read_port_stats64(priv, port, &stats); + + spin_lock_bh(&priv->stats_lock); + priv->ports[port].stats = stats; + priv->stats_last = jiffies; + spin_unlock_bh(&priv->stats_lock); + } +} + +static void mt7530_stats_poll(struct work_struct *work) +{ + struct mt7530_priv *priv = container_of(work, struct mt7530_priv, + stats_work.work); + + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); +} + +static void mt7530_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *storage) +{ + struct mt7530_priv *priv = ds->priv; + bool refresh; + + if (priv->bus) { + spin_lock_bh(&priv->stats_lock); + *storage = priv->ports[port].stats; + refresh = time_after(jiffies, priv->stats_last + + MT7530_STATS_RATE_LIMIT); + spin_unlock_bh(&priv->stats_lock); + if (refresh) + mod_delayed_work(system_percpu_wq, + &priv->stats_work, 0); + } else { + mt7530_read_port_stats64(priv, port, storage); + } +} + static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats) { @@ -3137,9 +3187,24 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq_domain) mt7530_free_mdio_irq(priv); + if (!ret && priv->bus) { + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); + } + return ret; } +static void +mt753x_teardown(struct dsa_switch *ds) +{ + struct mt7530_priv *priv = ds->priv; + + if (priv->bus) + cancel_delayed_work_sync(&priv->stats_work); +} + static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { @@ -3257,6 +3322,7 @@ static int mt7988_setup(struct dsa_switch *ds) static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .teardown = mt753x_teardown, .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, @@ -3395,6 +3461,9 @@ mt7530_probe_common(struct mt7530_priv *priv) priv->ds->ops = &mt7530_switch_ops; priv->ds->phylink_mac_ops = &mt753x_phylink_mac_ops; mutex_init(&priv->reg_mutex); + spin_lock_init(&priv->stats_lock); + INIT_DELAYED_WORK(&priv->stats_work, mt7530_stats_poll); + dev_set_drvdata(dev, priv); return 0;
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 3e0090b..dd33b0d 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h
@@ -796,6 +796,7 @@ struct mt7530_fdb { * @pvid: The VLAN specified is to be considered a PVID at ingress. Any * untagged frames will be assigned to the related VLAN. * @sgmii_pcs: Pointer to PCS instance for SerDes ports + * @stats: Cached port statistics for MDIO-connected switches */ struct mt7530_port { bool enable; @@ -803,6 +804,7 @@ struct mt7530_port { u32 pm; u16 pvid; struct phylink_pcs *sgmii_pcs; + struct rtnl_link_stats64 stats; }; /* Port 5 mode definitions of the MT7530 switch */ @@ -875,6 +877,9 @@ struct mt753x_info { * @create_sgmii: Pointer to function creating SGMII PCS instance(s) * @active_cpu_ports: Holding the active CPU ports * @mdiodev: The pointer to the MDIO device structure + * @stats_lock: Protects cached per-port stats from concurrent access + * @stats_work: Delayed work for polling MIB counters on MDIO switches + * @stats_last: Jiffies timestamp of last MIB counter poll */ struct mt7530_priv { struct device *dev; @@ -900,6 +905,9 @@ struct mt7530_priv { int (*create_sgmii)(struct mt7530_priv *priv); u8 active_cpu_ports; struct mdio_device *mdiodev; + spinlock_t stats_lock; /* protects cached stats counters */ + struct delayed_work stats_work; + unsigned long stats_last; }; struct mt7530_hw_vlan_entry {
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index f8b3d53..d0c0c0e 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -2120,14 +2120,12 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; error_unmap: - while (!list_empty(&tx_list)) { - e = list_first_entry(&tx_list, struct airoha_queue_entry, - list); + list_for_each_entry(e, &tx_list, list) { dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len, DMA_TO_DEVICE); e->dma_addr = 0; - list_move_tail(&e->list, &q->tx_list); } + list_splice(&tx_list, &q->tx_list); spin_unlock_bh(&q->lock); error:
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 60b7e53..3d3b090 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -135,11 +135,11 @@ */ #define XGBE_TSTAMP_SSINC 20 #define XGBE_TSTAMP_SNSINC 0 -#define XGBE_PTP_ACT_CLK_FREQ 500000000 +#define XGBE_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_TSTAMP_SSINC) #define XGBE_V2_TSTAMP_SSINC 0xA #define XGBE_V2_TSTAMP_SNSINC 0 -#define XGBE_V2_PTP_ACT_CLK_FREQ 1000000000 +#define XGBE_V2_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_V2_TSTAMP_SSINC) /* Define maximum supported values */ #define XGBE_MAX_PPS_OUT 4
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8c55874..008c34c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3825,7 +3825,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (!bp->max_tpa_v2) return 0; - bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + bp->max_tpa = min_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + /* Older P5 FW sets max_tpa_v2 low by mistake except NPAR */ + if (bp->max_tpa <= 32 && BNXT_CHIP_P5(bp) && !BNXT_NPAR(bp)) + bp->max_tpa = MAX_TPA_P5; } for (i = 0; i < bp->rx_nr_rings; i++) { @@ -17360,9 +17363,14 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) netdev_info(bp->dev, "PCI Slot Reset\n"); - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) - msleep(900); + if (test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) { + /* After DPC, the chip should return CRS when the vendor ID + * config register is read until it is ready. On all chips, + * this is not happening reliably so add a 5-second delay as a + * workaround. + */ + msleep(5000); + } netdev_lock(netdev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 53f336d..5d41dc1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -419,31 +419,13 @@ void bnxt_ptp_reapply_pps(struct bnxt *bp) } } -static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns, - u64 *cycles_delta) -{ - u64 cycles_now; - u64 nsec_now, nsec_delta; - int rc; - - rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now); - if (rc) - return rc; - - nsec_now = bnxt_timecounter_cyc2time(ptp, cycles_now); - - nsec_delta = target_ns - nsec_now; - *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult); - return 0; -} - static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, struct ptp_clock_request *rq) { struct hwrm_func_ptp_cfg_input *req; struct bnxt *bp = ptp->bp; struct timespec64 ts; - u64 target_ns, delta; + u64 target_ns; u16 enables; int rc; @@ -451,10 +433,6 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, ts.tv_nsec = rq->perout.start.nsec; target_ns = timespec64_to_ns(&ts); - rc = bnxt_get_target_cycles(ptp, target_ns, &delta); - if (rc) - return rc; - rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG); if (rc) return rc; @@ -468,7 +446,10 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, req->ptp_freq_adj_dll_phase = 0; req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC); req->ptp_freq_adj_ext_up = 0; - req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta); + req->ptp_freq_adj_ext_phase_lower = + cpu_to_le32(lower_32_bits(target_ns)); + req->ptp_freq_adj_ext_phase_upper = + cpu_to_le32(upper_32_bits(target_ns)); return hwrm_req_send(bp, req); }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 052bf69..5c75193 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -175,8 +175,14 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, ulp->handle = handle; rcu_assign_pointer(ulp->ulp_ops, ulp_ops); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { + rc = bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (rc) { + netdev_err(dev, "Failed to configure dual VNIC mode\n"); + RCU_INIT_POINTER(ulp->ulp_ops, NULL); + goto exit; + } + } edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 4824232..065cbbf 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c
@@ -1491,6 +1491,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE); if (!gpage) { dev_err(geth->dev, "could not find mapping\n"); + if (skb) { + napi_free_frags(&port->napi); + port->stats.rx_dropped++; + skb = NULL; + } continue; } page = gpage->page;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index e663bb5..e691144 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -330,6 +330,7 @@ struct enetc_si { struct workqueue_struct *workqueue; struct work_struct rx_mode_task; struct dentry *debugfs_root; + struct enetc_msg_swbd msg; /* Only valid for VSI */ }; #define ENETC_SI_ALIGN 32
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 6c4b374..df8e95c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -17,11 +17,36 @@ static void enetc_msg_vsi_write_msg(struct enetc_hw *hw, enetc_wr(hw, ENETC_VSIMSGSNDAR0, val); } +static void enetc_msg_dma_free(struct device *dev, struct enetc_msg_swbd *msg) +{ + if (msg->vaddr) { + dma_free_coherent(dev, msg->size, msg->vaddr, msg->dma); + msg->vaddr = NULL; + } +} + static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) { + struct device *dev = &si->pdev->dev; int timeout = 100; u32 vsimsgsr; + /* The VSI mailbox may be busy if last message was not yet processed + * by PSI. So need to check the mailbox status before sending. + */ + vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR); + if (vsimsgsr & ENETC_VSIMSGSR_MB) { + /* It is safe to free the DMA buffer here, the caller does + * not access the DMA buffer if enetc_msg_vsi_send() fails. + */ + enetc_msg_dma_free(dev, msg); + dev_err(dev, "VSI mailbox is busy\n"); + return -EIO; + } + + /* Free the DMA buffer of the last message */ + enetc_msg_dma_free(dev, &si->msg); + si->msg = *msg; enetc_msg_vsi_write_msg(&si->hw, msg); do { @@ -32,12 +57,15 @@ static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) usleep_range(1000, 2000); } while (--timeout); - if (!timeout) + if (!timeout) { + dev_err(dev, "VSI mailbox timeout\n"); + return -ETIMEDOUT; + } /* check for message delivery error */ if (vsimsgsr & ENETC_VSIMSGSR_MS) { - dev_err(&si->pdev->dev, "VSI command execute error: %d\n", + dev_err(dev, "VSI command execute error: %d\n", ENETC_SIMSGSR_GET_MC(vsimsgsr)); return -EIO; } @@ -50,7 +78,6 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, { struct enetc_msg_cmd_set_primary_mac *cmd; struct enetc_msg_swbd msg; - int err; msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64); msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma, @@ -67,11 +94,7 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, memcpy(&cmd->mac, saddr, sizeof(struct sockaddr)); /* send the command and wait */ - err = enetc_msg_vsi_send(priv->si, &msg); - - dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma); - - return err; + return enetc_msg_vsi_send(priv->si, &msg); } static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) @@ -259,6 +282,7 @@ static void enetc_vf_remove(struct pci_dev *pdev) { struct enetc_si *si = pci_get_drvdata(pdev); struct enetc_ndev_priv *priv; + struct enetc_msg_swbd msg; priv = netdev_priv(si->ndev); unregister_netdev(si->ndev); @@ -270,7 +294,9 @@ static void enetc_vf_remove(struct pci_dev *pdev) free_netdev(si->ndev); + msg = si->msg; enetc_pci_remove(pdev); + enetc_msg_dma_free(&pdev->dev, &msg); } static const struct pci_device_id enetc_vf_id_table[] = {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 3debf2f..6f13296 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c
@@ -249,34 +249,21 @@ DEFINE_SHOW_ATTRIBUTE(npc_defrag); int npc_cn20k_debugfs_init(struct rvu *rvu) { struct npc_priv_t *npc_priv = npc_priv_get(); - struct dentry *npc_dentry; - npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_mcam_layout_fops); + debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_mcam_layout_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, + rvu, &npc_mcam_default_fops); - npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, - rvu, &npc_mcam_default_fops); + debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_idx2vidx_map_fops); - npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_idx2vidx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_defrag_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_defrag_fops); return 0; }
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 7291fdb..6b3f453 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c
@@ -798,7 +798,7 @@ void npc_cn20k_load_mkex_profile(struct rvu *rvu, int blkaddr, iounmap(mkex_prfl_addr); } -void +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, int index, bool enable) { @@ -808,7 +808,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, u64 cfg, hw_prio; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + if (kw_type == NPC_MCAM_KEY_X2) { cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, @@ -819,7 +824,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); - return; + return 0; } /* For NPC_CN20K_MCAM_KEY_X4 keys, both the banks @@ -836,10 +841,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); } + + return 0; } -void -npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) +static void +npc_clear_x2_entry(struct rvu *rvu, int blkaddr, int bank, int index) { rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1), @@ -873,6 +880,33 @@ npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0); } +int +npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int mcam_idx) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int bank = npc_get_bank(mcam, mcam_idx); + u8 kw_type; + int index; + + if (npc_mcam_idx_2_key_type(rvu, mcam_idx, &kw_type)) + return -EINVAL; + + index = mcam_idx & (mcam->banksize - 1); + + if (kw_type == NPC_MCAM_KEY_X2) { + npc_clear_x2_entry(rvu, blkaddr, bank, index); + return 0; + } + + /* For NPC_MCAM_KEY_X4 keys, both the banks + * need to be programmed with the same value. + */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) + npc_clear_x2_entry(rvu, blkaddr, bank, index); + + return 0; +} + static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx, u64 *cam0, u64 *cam1) { @@ -1014,48 +1048,27 @@ static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, kw, req_kw_type); } -static void -npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, - int bank, u8 kw_type, bool enable, u8 hw_prio) -{ - struct npc_mcam *mcam = &rvu->hw->mcam; - u64 bank_cfg; - - bank_cfg = (u64)hw_prio << 24; - if (enable) - bank_cfg |= 0x1; - - if (kw_type == NPC_MCAM_KEY_X2) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - return; - } - - /* For NPC_MCAM_KEY_X4 keys, both the banks - * need to be programmed with the same value. - */ - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - } -} - -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type) +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type) { struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; int bank = index / mcam->banksize; + u64 bank_cfg = (u64)hw_prio << 24; int kw = 0; u8 kw_type; - /* Disable before mcam entry update */ - npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false); + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + + /* Disable before mcam entry update */ + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false)) + return -EINVAL; + /* CAM1 takes the comparison value and * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'. * CAM1<n> = 0 & CAM0<n> = 1 => match if key<n> = 0 @@ -1064,7 +1077,7 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, */ if (kw_type == NPC_MCAM_KEY_X2) { /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, bank, mcam_idx); npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, mcam_idx, intf, entry, bank, kw_type, kw, req_kw_type); @@ -1085,44 +1098,55 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 1), entry->vtag_action); - goto set_cfg; + + /* Set HW priority */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + + } else { + /* Clear mcam entry to avoid writes being suppressed by NPC */ + npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx); + + npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, + mcam_idx, intf, entry, + kw_type, req_kw_type); + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + /* Set 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 0), + entry->action); + + /* Set TAG 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 1), + entry->vtag_action); + + /* Set 'action2' for inline receive */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 2), + entry->action2); + + /* Set HW priority */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + } } - /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx); - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx); - - npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, - mcam_idx, intf, entry, - kw_type, req_kw_type); - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - /* Set 'action' */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 0), - entry->action); - - /* Set TAG 'action' */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 1), - entry->vtag_action); - - /* Set 'action2' for inline receive */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 2), - entry->action2); - } - -set_cfg: /* TODO: */ /* PF installing VF rule */ - npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank, - kw_type, enable, hw_prio); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + return -EINVAL; + + return 0; } -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cfg, sreg, dreg, soff, doff; @@ -1130,12 +1154,20 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) int bank, i, sb, db; int dbank, sbank; + if (src >= mcam->total_entries || dest >= mcam->total_entries) + return -EINVAL; + dbank = npc_get_bank(mcam, dest); sbank = npc_get_bank(mcam, src); - npc_mcam_idx_2_key_type(rvu, src, &src_kwtype); - npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype); + + if (npc_mcam_idx_2_key_type(rvu, src, &src_kwtype)) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype)) + return -EINVAL; + if (src_kwtype != dest_kwtype) - return; + return -EINVAL; src &= (mcam->banksize - 1); dest &= (mcam->banksize - 1); @@ -1170,6 +1202,8 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) if (src_kwtype == NPC_MCAM_KEY_X2) break; } + + return 0; } static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, @@ -1179,21 +1213,37 @@ static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, entry->kw_mask[idx] = cam1 ^ cam0; } -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, - u8 *intf, u8 *ena, u8 *hw_prio) +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, + u8 *intf, u8 *ena, u8 *hw_prio) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cam0, cam1, bank_cfg, cfg; int kw = 0, bank; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (index >= mcam->total_entries) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0)); + entry->action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 1)); + entry->vtag_action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 2)); + entry->action2 = cfg; + + cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1)) & 3; *intf = cfg; @@ -1242,7 +1292,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, bank, 0)); npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); - goto read_action; + return 0; } for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) { @@ -1287,17 +1337,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); } -read_action: - /* 'action' is set to same value for both bank '0' and '1'. - * Hence, reading bank '0' should be enough. - */ - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 0)); - entry->action = cfg; - - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1)); - entry->vtag_action = cfg; + return 0; } int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, @@ -1335,11 +1375,10 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, if (is_pffunc_af(req->hdr.pcifunc)) nix_intf = req->intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); - rc = 0; exit: mutex_unlock(&mcam->lock); return rc; @@ -1361,11 +1400,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu, mutex_lock(&mcam->lock); rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); - if (!rc) - npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, - &rsp->entry_data, &rsp->intf, - &rsp->enable, &rsp->hw_prio); + if (rc) + goto fail; + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, + &rsp->entry_data, &rsp->intf, + &rsp->enable, &rsp->hw_prio); +fail: mutex_unlock(&mcam->lock); return rc; } @@ -1375,11 +1416,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, struct npc_mcam_alloc_and_write_entry_rsp *rsp) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npc_mcam_free_entry_req free_req = { 0 }; struct npc_mcam_alloc_entry_req entry_req; struct npc_mcam_alloc_entry_rsp entry_rsp; struct npc_mcam *mcam = &rvu->hw->mcam; u16 entry = NPC_MCAM_ENTRY_INVALID; - int blkaddr, rc; + struct msg_rsp free_rsp; + int blkaddr, rc, err; u8 nix_intf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -1415,12 +1458,23 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); mutex_unlock(&mcam->lock); + if (rc) { + free_req.hdr.pcifunc = req->hdr.pcifunc; + free_req.entry = entry_rsp.entry; + err = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &free_rsp); + if (err) + dev_err(rvu->dev, + "%s: Error to free mcam idx %u\n", + __func__, entry_rsp.entry); + return rc; + } + rsp->entry = entry_rsp.entry; return 0; } @@ -1480,9 +1534,9 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, read_entry: /* Read the mcam entry */ - npc_cn20k_read_mcam_entry(rvu, blkaddr, index, - &rsp->entry, &intf, - &enable, &hw_prio); + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, index, + &rsp->entry, &intf, + &enable, &hw_prio); mutex_unlock(&mcam->lock); out: return rc; @@ -2305,6 +2359,7 @@ static int __npc_subbank_alloc(struct rvu *rvu, struct npc_subbank *sb, __npc_subbank_mark_free(rvu, sb); err1: kfree(save); + *alloc_cnt = 0; return rc; } @@ -3482,7 +3537,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, { int alloc_cnt1, alloc_cnt2; struct npc_subbank *sb; - int rc, sb_off, i; + int rc, sb_off, i, err; bool deleted; sb = &npc_priv.sb[f->idx]; @@ -3496,6 +3551,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, NPC_MCAM_LOWER_PRIO, false, cnt, save, cnt, true, &alloc_cnt1); + if (alloc_cnt1 < cnt) { rc = __npc_subbank_alloc(rvu, sb, NPC_MCAM_KEY_X2, sb->b1b, @@ -3511,15 +3567,17 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, dev_err(rvu->dev, "%s: Failed to alloc cnt=%u alloc_cnt1=%u alloc_cnt2=%u\n", __func__, cnt, alloc_cnt1, alloc_cnt2); + rc = -ENOSPC; goto fail_free_alloc; } + return 0; fail_free_alloc: for (i = 0; i < alloc_cnt1 + alloc_cnt2; i++) { - rc = npc_mcam_idx_2_subbank_idx(rvu, save[i], - &sb, &sb_off); - if (rc) { + err = npc_mcam_idx_2_subbank_idx(rvu, save[i], + &sb, &sb_off); + if (err) { dev_err(rvu->dev, "%s: Error to find subbank for mcam idx=%u\n", __func__, save[i]); @@ -3565,9 +3623,10 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, struct npc_defrag_node *v, int cnt, u16 *save) { + u16 new_midx, old_midx, vidx, target_pf; struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule, *tmp; int i, vidx_cnt, rc, sb_off; - u16 new_midx, old_midx, vidx; struct npc_subbank *sb; bool deleted; u16 pcifunc; @@ -3607,9 +3666,30 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx, bank)); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false); - npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true); + /* If bug happened during copy/enable mcam, then there is a bug in allocation + * algorithm itself. There is no point in rewinding and returning, as it + * will face further issue. Return error after printing error + */ + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false)) { + dev_err(rvu->dev, + "%s: Error happened while disabling old_mid=%u\n", + __func__, old_midx); + return -EFAULT; + } + + if (npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx)) { + dev_err(rvu->dev, + "%s: Error happened while copying old_midx=%u new_midx=%u\n", + __func__, old_midx, new_midx); + return -EFAULT; + } + + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true)) { + dev_err(rvu->dev, + "%s: Error happened while enabling new_mid=%u\n", + __func__, new_midx); + return -EFAULT; + } midx = new_midx % mcam->banksize; bank = new_midx / mcam->banksize; @@ -3665,8 +3745,21 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, mcam->entry2pfvf_map[new_midx] = pcifunc; /* Counter is not preserved */ mcam->entry2cntr_map[new_midx] = new_midx; + target_pf = mcam->entry2target_pffunc[old_midx]; + mcam->entry2target_pffunc[new_midx] = target_pf; + mcam->entry2target_pffunc[old_midx] = NPC_MCAM_INVALID_MAP; + npc_mcam_set_bit(mcam, new_midx); + /* Note: list order is not functionally required for mcam_rules */ + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + if (rule->entry != old_midx) + continue; + + rule->entry = new_midx; + break; + } + /* Mark as invalid */ v->vidx[vidx_cnt - i - 1] = -1; save[cnt - i - 1] = -1; @@ -3935,6 +4028,13 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, void *val; int i, j; + for (i = 0; i < ARRAY_SIZE(ptr); i++) { + if (!ptr[i]) + continue; + + *ptr[i] = USHRT_MAX; + } + if (!npc_priv.init_done) return 0; @@ -3950,7 +4050,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], pcifunc); - *ptr[0] = USHRT_MAX; return -ESRCH; } @@ -3970,7 +4069,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], pcifunc); - *ptr[3] = USHRT_MAX; return -ESRCH; } @@ -3990,7 +4088,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, __func__, npc_dft_rule_name[i], pcifunc); - *ptr[j] = USHRT_MAX; continue; } @@ -4085,7 +4182,7 @@ int rvu_mbox_handler_npc_get_dft_rl_idxs(struct rvu *rvu, struct msg_req *req, return 0; } -static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) { return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) || is_lbk_vf(rvu, pcifunc); @@ -4093,11 +4190,11 @@ static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) { - struct npc_mcam_free_entry_req free_req = { 0 }; + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 ptr[4] = {[0 ... 3] = USHRT_MAX}; + struct rvu_npc_mcam_rule *rule, *tmp; unsigned long index; - struct msg_rsp rsp; - u16 ptr[4]; - int rc, i; + int blkaddr, rc, i; void *map; if (!npc_priv.init_done) @@ -4155,14 +4252,43 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) } free_rules: + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + for (int i = 0; i < 4; i++) { + if (ptr[i] == USHRT_MAX) + continue; - free_req.hdr.pcifunc = pcifunc; - free_req.all = 1; - rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); - if (rc) - dev_err(rvu->dev, - "%s: Error deleting default entries (pcifunc=%#x\n", - __func__, pcifunc); + mutex_lock(&mcam->lock); + npc_mcam_clear_bit(mcam, ptr[i]); + mcam->entry2pfvf_map[ptr[i]] = NPC_MCAM_INVALID_MAP; + npc_cn20k_enable_mcam_entry(rvu, blkaddr, ptr[i], false); + mcam->entry2target_pffunc[ptr[i]] = 0x0; + mutex_unlock(&mcam->lock); + + rc = npc_cn20k_idx_free(rvu, &ptr[i], 1); + if (rc) { + /* Non recoverable error. Let us WARN and return. Keep system alive to + * enable debugging + */ + WARN(1, "%s Error deleting default entries (pcifunc=%#x) mcam_idx=%u\n", + __func__, pcifunc, ptr[i]); + return; + } + } + + mutex_lock(&mcam->lock); + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + for (int i = 0; i < 4; i++) { + if (ptr[i] != rule->entry) + continue; + + list_del(&rule->list); + kfree(rule); + break; + } + } + mutex_unlock(&mcam->lock); } int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 815d0b2..3d5eb95 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h
@@ -320,21 +320,21 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc); int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type); -void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, - int index, bool enable); -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, - u16 src, u16 dest); -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, u8 *intf, - u8 *ena, u8 *hw_prio); -void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, - int bank, int index); +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type); +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, + int index, bool enable); +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, + u16 src, u16 dest); +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, u8 *intf, + u8 *ena, u8 *hw_prio); +int npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); u16 npc_cn20k_vidx2idx(u16 index); u16 npc_cn20k_idx2vidx(u16 idx); int npc_cn20k_defrag(struct rvu *rvu); +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc); #endif /* NPC_CN20K_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index ef5b081..f977734 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3577,6 +3577,9 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc, mcam_index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (mcam_index < 0) + return -EINVAL; + err = nix_update_mce_list(rvu, pcifunc, mce_list, mce_idx, mcam_index, add); return err;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index c2ca5ed..3c814d1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -163,14 +163,35 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, if (rc) return -EFAULT; + if (is_lbk_vf(rvu, pcifunc)) { + if (promisc == USHRT_MAX) + return -EINVAL; + return promisc; + } + + if (is_cgx_vf(rvu, pcifunc)) { + if (ucast == USHRT_MAX) + return -EINVAL; + + return ucast; + } + switch (type) { case NIXLF_BCAST_ENTRY: + if (bcast == USHRT_MAX) + return -EINVAL; return bcast; case NIXLF_ALLMULTI_ENTRY: + if (mcast == USHRT_MAX) + return -EINVAL; return mcast; case NIXLF_PROMISC_ENTRY: + if (promisc == USHRT_MAX) + return -EINVAL; return promisc; case NIXLF_UCAST_ENTRY: + if (ucast == USHRT_MAX) + return -EINVAL; return ucast; default: return -EINVAL; @@ -238,10 +259,10 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int actbank = bank; if (is_cn20k(rvu->pdev)) { - if (index < 0 || index >= mcam->banksize * mcam->banks) - return; - - return npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + dev_err(rvu->dev, "Error to %s mcam %u entry\n", + enable ? "enable" : "disable", index); + return; } index &= (mcam->banksize - 1); @@ -258,6 +279,13 @@ static void npc_clear_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int bank = npc_get_bank(mcam, index); int actbank = bank; + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_clear_mcam_entry(rvu, blkaddr, index)) + dev_err(rvu->dev, "%s Failed to clear mcam %u\n", + __func__, index); + return; + } + index &= (mcam->banksize - 1); for (; bank < (actbank + mcam->banks_per_entry); bank++) { rvu_write64(rvu, blkaddr, @@ -424,6 +452,15 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: failed to get ucast entry pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -589,8 +626,8 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)) & 1; } -static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, u16 src, u16 dest) +static int npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 src, u16 dest) { int dbank = npc_get_bank(mcam, dest); int sbank = npc_get_bank(mcam, src); @@ -630,6 +667,7 @@ static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(dest, dbank), cfg); + return 0; } u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, @@ -689,6 +727,12 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* Don't change the action if entry is already enabled * Otherwise RSS action may get overwritten. @@ -744,16 +788,38 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + /* In cn20k, default indexes are installed only for CGX mapped + * and lbk interfaces + */ if (is_cgx_vf(rvu, pcifunc)) index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, NIXLF_PROMISC_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also + * Please note that for lbk(s) "index" and "ucast_idx" + * will be same. */ - ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); + if (is_lbk_vf(rvu, pcifunc)) + ucast_idx = index; + else + ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast/promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -827,6 +893,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -867,6 +941,12 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get bcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } if (!hw->cap.nix_rx_multicast) { /* Early silicon doesn't support pkt replication, @@ -931,12 +1011,25 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* If the corresponding PF's ucast action is RSS, * use the same action for multicast entry also */ ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -1001,6 +1094,13 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -1113,8 +1213,12 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, index = mcam_index; } - if (index >= mcam->total_entries) + if (index < 0 || index >= mcam->total_entries) { + dev_err(rvu->dev, + "%s: Invalid mcam index, pcifunc=%#x\n", + __func__, pcifunc); return; + } bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -1158,16 +1262,18 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, /* If PF's promiscuous entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); /* If PF's allmulti entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); } } @@ -1180,12 +1286,22 @@ void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, int index, blkaddr, mce_idx; struct rvu_pfvf *pfvf; + /* multicast pkt replication is not enabled for AF's VFs & SDP links */ + if (is_lbk_vf(rvu, pcifunc) || is_sdp_pfvf(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get entry for pcifunc=%#x, type=%u\n", + __func__, pcifunc, type); + return; + } /* disable MCAM entry when packet replication is not supported by hw */ if (!hw->cap.nix_rx_multicast && !is_vf(pcifunc)) { @@ -1214,6 +1330,10 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, struct npc_mcam *mcam = &rvu->hw->mcam; int index, blkaddr; + /* only CGX or LBK interfaces have default entries */ + if (is_cn20k(rvu->pdev) && !npc_is_cgx_or_lbk(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; @@ -1223,6 +1343,12 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, pfvf->nix_rx_intf)) { index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -2504,33 +2630,58 @@ void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index) static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc) { + u16 dft_idxs[NPC_DFT_RULE_MAX_ID] = {[0 ... NPC_DFT_RULE_MAX_ID - 1] = USHRT_MAX}; + bool cn20k_dft_rl; u16 index, cntr; int rc; + npc_cn20k_dft_rules_idx_get(rvu, pcifunc, + &dft_idxs[NPC_DFT_RULE_BCAST_ID], + &dft_idxs[NPC_DFT_RULE_MCAST_ID], + &dft_idxs[NPC_DFT_RULE_PROMISC_ID], + &dft_idxs[NPC_DFT_RULE_UCAST_ID]); + /* Scan all MCAM entries and free the ones mapped to 'pcifunc' */ for (index = 0; index < mcam->bmap_entries; index++) { - if (mcam->entry2pfvf_map[index] == pcifunc) { + if (mcam->entry2pfvf_map[index] != pcifunc) + continue; + + cn20k_dft_rl = false; + + if (is_cn20k(rvu->pdev)) { + if (dft_idxs[NPC_DFT_RULE_BCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_MCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_PROMISC_ID] == index || + dft_idxs[NPC_DFT_RULE_UCAST_ID] == index) { + cn20k_dft_rl = true; + } + } + + /* Disable the entry */ + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + + if (!cn20k_dft_rl) { mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; /* Free the entry in bitmap */ npc_mcam_clear_bit(mcam, index); - /* Disable the entry */ - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); - - /* Update entry2counter mapping */ - cntr = mcam->entry2cntr_map[index]; - if (cntr != NPC_MCAM_INVALID_MAP) - npc_unmap_mcam_entry_and_cntr(rvu, mcam, - blkaddr, index, - cntr); mcam->entry2target_pffunc[index] = 0x0; - if (is_cn20k(rvu->pdev)) { - rc = npc_cn20k_idx_free(rvu, &index, 1); - if (rc) - dev_err(rvu->dev, - "Failed to free mcam idx=%u pcifunc=%#x\n", - index, pcifunc); - } } + + /* Update entry2counter mapping */ + cntr = mcam->entry2cntr_map[index]; + if (cntr != NPC_MCAM_INVALID_MAP) + npc_unmap_mcam_entry_and_cntr(rvu, mcam, + blkaddr, index, + cntr); + + if (!is_cn20k(rvu->pdev) || cn20k_dft_rl) + continue; + + rc = npc_cn20k_idx_free(rvu, &index, 1); + if (rc) + dev_err(rvu->dev, + "Failed to free mcam idx=%u pcifunc=%#x\n", + index, pcifunc); } } @@ -3266,7 +3417,10 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, false); /* Copy rule from old entry to new entry */ - npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry); + if (npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry)) { + rc = NPC_MCAM_INVALID_REQ; + break; + } /* Copy counter mapping, if any */ cntr = mcam->entry2cntr_map[old_entry]; @@ -3284,7 +3438,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, /* If shift has failed then report the failed index */ if (index != req->shift_count) { - rc = NPC_MCAM_PERM_DENIED; + if (!rc) + rc = NPC_MCAM_PERM_DENIED; rsp->failed_entry_idx = index; } @@ -3851,6 +4006,12 @@ int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, /* Read the default ucast entry if there is no pkt steering rule */ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + mutex_unlock(&mcam->lock); + rc = NIX_AF_ERR_AF_LF_INVALID; + goto out; + } + read_entry: /* Read the mcam entry */ npc_read_mcam_entry(rvu, mcam, blkaddr, index, &rsp->entry, &intf, @@ -3924,6 +4085,12 @@ void rvu_npc_clear_ucast_entry(struct rvu *rvu, int pcifunc, int nixlf) ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, ucast_idx, false);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index b45798d..6ae9cdc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1444,7 +1444,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, struct msg_rsp write_rsp; struct mcam_entry *entry; bool new = false; - u16 entry_index; + int entry_index; int err; installed_features = req->features; @@ -1477,6 +1477,14 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, if (req->default_rule) { entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf, NIXLF_UCAST_ENTRY); + + if (entry_index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for target=%#x\n", + __func__, target); + return -EINVAL; + } + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index); } @@ -1980,13 +1988,15 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); - if (is_cn20k(rvu->pdev)) - npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, - cn20k_entry, &intf, - &enable, &hw_prio); - else + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, + cn20k_entry, &intf, + &enable, &hw_prio)) + return -EINVAL; + } else { npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, entry, &intf, &enable); + } npc_update_entry(rvu, NPC_DMAC, &mdata, ether_addr_to_u64(pfvf->mac_addr), 0, @@ -2038,8 +2048,12 @@ void npc_mcam_enable_flows(struct rvu *rvu, u16 target) continue; } - if (rule->vfvlan_cfg) - npc_update_dmac_value(rvu, blkaddr, rule, pfvf); + if (rule->vfvlan_cfg) { + if (npc_update_dmac_value(rvu, blkaddr, rule, pfvf)) + dev_err(rvu->dev, + "Update dmac failed for %u, target=%#x\n", + rule->entry, target); + } if (rule->rx_action.op == NIX_RX_ACTION_DEFAULT) { if (!def_ucast_rule)
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index dd890f5..8711689 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; + unsigned long flags; - rcu_read_lock(); + spin_lock_irqsave(&srq_table->lock, flags); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - rcu_read_unlock(); - if (srq) - refcount_inc(&srq->refcount); - else { + if (!srq || !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; + spin_unlock_irqrestore(&srq_table->lock, flags); + if (!srq) { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - refcount_set(&srq->refcount, 1); init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 6a50b6d..d9adb99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c
@@ -1070,29 +1070,37 @@ static struct psp_dev_ops mlx5_psp_ops = { void mlx5e_psp_unregister(struct mlx5e_priv *priv) { - if (!priv->psp || !priv->psp->psp) + struct mlx5e_psp *psp = priv->psp; + + if (!psp || !psp->psp) return; - psp_dev_unregister(priv->psp->psp); + psp_dev_unregister(psp->psp); + psp->psp = NULL; } void mlx5e_psp_register(struct mlx5e_priv *priv) { + struct mlx5e_psp *psp = priv->psp; + struct psp_dev *psd; + /* FW Caps missing */ if (!priv->psp) return; - priv->psp->caps.assoc_drv_spc = sizeof(u32); - priv->psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; + psp->caps.assoc_drv_spc = sizeof(u32); + psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; if (MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_encrypt) && MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_decrypt)) - priv->psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; + psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; - priv->psp->psp = psp_dev_create(priv->netdev, &mlx5_psp_ops, - &priv->psp->caps, NULL); - if (IS_ERR(priv->psp->psp)) + psd = psp_dev_create(priv->netdev, &mlx5_psp_ops, &psp->caps, NULL); + if (IS_ERR(psd)) { mlx5_core_err(priv->mdev, "PSP failed to register due to %pe\n", - priv->psp->psp); + psd); + return; + } + psp->psp = psd; } int mlx5e_psp_init(struct mlx5e_priv *priv) @@ -1131,22 +1139,18 @@ int mlx5e_psp_init(struct mlx5e_priv *priv) if (!psp) return -ENOMEM; - priv->psp = psp; fs = mlx5e_accel_psp_fs_init(priv); if (IS_ERR(fs)) { err = PTR_ERR(fs); - goto out_err; + kfree(psp); + return err; } psp->fs = fs; + priv->psp = psp; mlx5_core_dbg(priv->mdev, "PSP attached to netdevice\n"); return 0; - -out_err: - priv->psp = NULL; - kfree(psp); - return err; } void mlx5e_psp_cleanup(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5a46870..8f2b3ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6023,7 +6023,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (take_rtnl) rtnl_lock(); - mlx5e_psp_register(priv); /* update XDP supported features */ mlx5e_set_xdp_feature(priv); @@ -6036,7 +6035,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_psp_unregister(priv); mlx5e_ktls_cleanup(priv); mlx5e_psp_cleanup(priv); mlx5e_fs_cleanup(priv->fs); @@ -6160,6 +6158,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_fs_init_l2_addr(priv->fs, netdev); mlx5e_ipsec_init(priv); + mlx5e_psp_register(priv); err = mlx5e_macsec_init(priv); if (err) @@ -6230,6 +6229,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); mlx5e_macsec_cleanup(priv); + mlx5e_psp_unregister(priv); mlx5e_ipsec_cleanup(priv); } @@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev) return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_resume(actual_adev); - return 0; + if (actual_adev) { + err = _mlx5e_resume(actual_adev); + mlx5_sd_put_adev(actual_adev, adev); + } + return err; } static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) @@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) err = _mlx5e_suspend(actual_adev, false); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); return err; } @@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev, return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_probe(actual_adev); + if (actual_adev) { + err = _mlx5e_probe(actual_adev); + if (err) + goto sd_cleanup; + mlx5_sd_put_adev(actual_adev, adev); + } return 0; + +sd_cleanup: + mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); + return err; } static void _mlx5e_remove(struct auxiliary_device *adev) @@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev) _mlx5e_remove(actual_adev); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); } static const struct auxiliary_device_id mlx5e_id_table[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 762c783..6e19916 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -18,6 +18,7 @@ struct mlx5_sd { u8 host_buses; struct mlx5_devcom_comp_dev *devcom; struct dentry *dfs; + u8 state; bool primary; union { struct { /* primary */ @@ -31,6 +32,11 @@ struct mlx5_sd { }; }; +enum mlx5_sd_state { + MLX5_SD_STATE_DOWN = 0, + MLX5_SD_STATE_UP, +}; + static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); @@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); - mlx5_devcom_comp_lock(sd->devcom); - mlx5_devcom_comp_set_ready(sd->devcom, false); - mlx5_devcom_comp_unlock(sd->devcom); mlx5_devcom_unregister_component(sd->devcom); } @@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) struct mlx5_core_dev *primary, *pos, *to; struct mlx5_sd *sd = mlx5_get_sd(dev); u8 alias_key[ACCESS_KEY_LEN]; + struct mlx5_sd *primary_sd; int err, i; err = sd_init(dev); @@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_cleanup; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - return 0; + goto out; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_DOWN) + goto out; for (i = 0; i < ACCESS_KEY_LEN; i++) alias_key[i] = get_random_u8(); @@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_unregister; - sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary)); - debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id); - debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops); + primary_sd->dfs = + debugfs_create_dir("multi-pf", + mlx5_debugfs_get_dev_root(primary)); + debugfs_create_x32("group_id", 0400, primary_sd->dfs, + &primary_sd->group_id); + debugfs_create_file("primary", 0400, primary_sd->dfs, primary, + &dev_fops); mlx5_sd_for_each_secondary(i, primary, pos) { char name[32]; @@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) goto err_unset_secondaries; snprintf(name, sizeof(name), "secondary_%d", i - 1); - debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops); + debugfs_create_file(name, 0400, primary_sd->dfs, pos, + &dev_fops); } @@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); sd_print_group(primary); + primary_sd->state = MLX5_SD_STATE_UP; +out: + mlx5_devcom_comp_unlock(sd->devcom); return 0; err_unset_secondaries: @@ -479,8 +498,18 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) mlx5_sd_for_each_secondary_to(i, primary, to, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; err_sd_unregister: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; + mlx5_devcom_comp_set_ready(sd->devcom, false); + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); err_sd_cleanup: sd_cleanup(dev); @@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary, *pos; + struct mlx5_sd *primary_sd; int i; if (!sd) return; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - goto out; + goto out_unlock; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out_ready_false; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_UP) + goto out_clear_peers; + mlx5_sd_for_each_secondary(i, primary, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; sd_info(primary, "group id %#x, uncombined\n", sd->group_id); -out: + primary_sd->state = MLX5_SD_STATE_DOWN; +out_clear_peers: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; +out_ready_false: + mlx5_devcom_comp_set_ready(sd->devcom, false); +out_unlock: + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); sd_cleanup(dev); } +/* Lock order: + * primary: actual_adev_lock -> SD devcom comp lock + * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock + * The two locks are never held together, so no ABBA. + */ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary; + struct mlx5_adev *primary_adev; if (!sd) return adev; - if (!mlx5_devcom_comp_is_ready(sd->devcom)) + mlx5_devcom_comp_lock(sd->devcom); + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + mlx5_devcom_comp_unlock(sd->devcom); return NULL; + } primary = mlx5_sd_get_primary(dev); - if (dev == primary) + if (!primary || dev == primary) { + mlx5_devcom_comp_unlock(sd->devcom); return adev; + } - return &primary->priv.adev[idx]->adev; + primary_adev = primary->priv.adev[idx]; + get_device(&primary_adev->adev.dev); + mlx5_devcom_comp_unlock(sd->devcom); + + device_lock(&primary_adev->adev.dev); + /* Primary may have completed remove between dropping devcom and + * acquiring device_lock; recheck. + */ + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + device_unlock(&primary_adev->adev.dev); + put_device(&primary_adev->adev.dev); + return NULL; + } + return &primary_adev->adev; +} + +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev) +{ + if (actual_adev != adev) { + device_unlock(&actual_adev->dev); + put_device(&actual_adev->dev); + } }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h index 137efaf..9bfd5b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx); +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev); int mlx5_sd_init(struct mlx5_core_dev *dev); void mlx5_sd_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index c406a3b..4dea2bb 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -826,7 +826,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netif_tx_stop_all_queues(netdev); if (fbnic_phylink_create(netdev)) { - fbnic_netdev_free(fbd); + free_netdev(netdev); + fbd->netdev = NULL; return NULL; }
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6a745bb..eb57b86 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -31,11 +31,11 @@ enum spx5_target_chiptype { SPX5_TARGET_CT_7552 = 0x7552, /* SparX-5-128 Enterprise */ SPX5_TARGET_CT_7556 = 0x7556, /* SparX-5-160 Enterprise */ SPX5_TARGET_CT_7558 = 0x7558, /* SparX-5-200 Enterprise */ - SPX5_TARGET_CT_7546TSN = 0x47546, /* SparX-5-64i Industrial */ - SPX5_TARGET_CT_7549TSN = 0x47549, /* SparX-5-90i Industrial */ - SPX5_TARGET_CT_7552TSN = 0x47552, /* SparX-5-128i Industrial */ - SPX5_TARGET_CT_7556TSN = 0x47556, /* SparX-5-160i Industrial */ - SPX5_TARGET_CT_7558TSN = 0x47558, /* SparX-5-200i Industrial */ + SPX5_TARGET_CT_7546TSN = 0x0546, /* SparX-5-64i Industrial */ + SPX5_TARGET_CT_7549TSN = 0x0549, /* SparX-5-90i Industrial */ + SPX5_TARGET_CT_7552TSN = 0x0552, /* SparX-5-128i Industrial */ + SPX5_TARGET_CT_7556TSN = 0x0556, /* SparX-5-160i Industrial */ + SPX5_TARGET_CT_7558TSN = 0x0558, /* SparX-5-200i Industrial */ SPX5_TARGET_CT_LAN9694 = 0x9694, /* lan969x-40 */ SPX5_TARGET_CT_LAN9691VAO = 0x9691, /* lan969x-40-VAO */ SPX5_TARGET_CT_LAN9694TSN = 0x9695, /* lan969x-40-TSN */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 04bc8ff..62c4989 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -1128,7 +1128,8 @@ int sparx5_port_init(struct sparx5 *sparx5, DEV2G5_PCS1G_SD_CFG(port->portno)); if (conf->portmode == PHY_INTERFACE_MODE_QSGMII || - conf->portmode == PHY_INTERFACE_MODE_SGMII) { + conf->portmode == PHY_INTERFACE_MODE_SGMII || + conf->portmode == PHY_INTERFACE_MODE_1000BASEX) { err = sparx5_serdes_set(sparx5, port, conf); if (err) return err;
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 098fbda..d8e8168 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -43,8 +43,9 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset) static int mana_gd_init_pf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); - void __iomem *sriov_base_va; + u64 remaining_barsize; u64 sriov_base_off; + u64 sriov_shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -73,10 +74,28 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); + if (sriov_base_off >= gc->bar0_size || + gc->bar0_size - sriov_base_off < + GDMA_PF_REG_SHM_OFF + sizeof(u64) || + !IS_ALIGNED(sriov_base_off, sizeof(u64))) { + dev_err(gc->dev, + "SRIOV base offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_base_off, (u64)gc->bar0_size); + return -EPROTO; + } - sriov_base_va = gc->bar0_va + sriov_base_off; - gc->shm_base = sriov_base_va + - mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + remaining_barsize = gc->bar0_size - sriov_base_off; + sriov_shm_off = mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + if (sriov_shm_off >= remaining_barsize || + remaining_barsize - sriov_shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(sriov_shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SRIOV SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + sriov_base_off + sriov_shm_off; return 0; } @@ -84,6 +103,7 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) static int mana_gd_init_vf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); + u64 shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -111,7 +131,17 @@ static int mana_gd_init_vf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + gc->db_page_off; gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; - gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + shm_off = mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + if (shm_off >= gc->bar0_size || + gc->bar0_size - shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + shm_off; return 0; }
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a654b36..9afc786 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2520,9 +2520,12 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi_disable_locked(napi); netif_napi_del_locked(napi); } - xdp_rxq_info_unreg(&rxq->xdp_rxq); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + if (rxq->rxobj != INVALID_MANA_HANDLE) + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); @@ -2796,9 +2799,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, mana_destroy_rxq(apc, rxq, false); - if (cq) - mana_deinit_cq(apc, cq); - return NULL; }
diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 0f1679e..d21b5db 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c
@@ -61,11 +61,6 @@ union smc_proto_hdr { }; }; /* HW DATA */ -#define SMC_APERTURE_BITS 256 -#define SMC_BASIC_UNIT (sizeof(u32)) -#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) -#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) - static int mana_smc_poll_register(void __iomem *base, bool reset) { void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT;
diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 03a2669..ee8381b 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c
@@ -797,11 +797,11 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) /* Enter config mode before registering the MDIO bus */ ret = rtsn_reset(priv); if (ret) - goto out_free_bus; + goto out_put_node; ret = rtsn_change_mode(priv, OCR_OPC_CONFIG); if (ret) - goto out_free_bus; + goto out_put_node; rtsn_modify(priv, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, MPIC_PSMCS_DEFAULT | MPIC_PSMHT_DEFAULT); @@ -824,6 +824,8 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) return 0; +out_put_node: + of_node_put(mdio_node); out_free_bus: mdiobus_free(mii); return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c index e2240b6..2ab6eca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c
@@ -100,6 +100,8 @@ static int nvt_gmac_probe(struct platform_device *pdev) if (!priv) return dev_err_probe(dev, -ENOMEM, "Failed to allocate private data\n"); + priv->dev = dev; + priv->regmap = syscon_regmap_lookup_by_phandle_args(dev->of_node, "nuvoton,sys", 1, &priv->macid); if (IS_ERR(priv->regmap))
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index d3772d0..2451f6b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -2480,8 +2480,11 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, - rd32(wx, WX_CFG_PORT_ST)); + if (pdev->is_virtfn) + wx->bus.func = PCI_FUNC(pdev->devfn); + else + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) {
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index 29cdbed..94ff8f5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c
@@ -99,8 +99,8 @@ int wx_request_msix_irqs_vf(struct wx *wx) } } - err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf, - NULL, IRQF_ONESHOT, netdev->name, wx); + err = request_irq(wx->msix_entry->vector, wx_msix_misc_vf, + 0, netdev->name, wx); if (err) { wx_err(wx, "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a05af19..a750768 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c
@@ -1182,7 +1182,8 @@ void nsim_destroy(struct netdevsim *ns) unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, &ns->nn); - nsim_psp_uninit(ns); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_psp_uninit(ns); rtnl_lock(); peer = rtnl_dereference(ns->peer);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7e129dd..d909c41 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h
@@ -120,7 +120,9 @@ struct netdevsim { u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; - struct psp_dev *dev; + struct psp_dev __rcu *dev; + struct dentry *rereg; + struct mutex rereg_lock; u32 spi; u32 assoc_cnt; } psp;
diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c index 0b4d717..6936ecb 100644 --- a/drivers/net/netdevsim/psp.c +++ b/drivers/net/netdevsim/psp.c
@@ -19,6 +19,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, struct netdevsim *peer_ns, struct skb_ext **psp_ext) { enum skb_drop_reason rc = 0; + struct psp_dev *peer_psd; struct psp_assoc *pas; struct net *net; void **ptr; @@ -48,7 +49,8 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, } /* Now pretend we just received this frame */ - if (peer_ns->psp.dev->config.versions & (1 << pas->version)) { + peer_psd = rcu_dereference(peer_ns->psp.dev); + if (peer_psd && peer_psd->config.versions & (1 << pas->version)) { bool strip_icv = false; u8 generation; @@ -61,8 +63,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, skb_ext_reset(skb); skb->mac_len = ETH_HLEN; - if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation, - strip_icv)) { + if (psp_dev_rcv(skb, peer_psd->id, generation, strip_icv)) { rc = SKB_DROP_REASON_PSP_OUTPUT; goto out_unlock; } @@ -209,11 +210,26 @@ static struct psp_dev_caps nsim_psp_caps = { .assoc_drv_spc = sizeof(void *), }; +static void __nsim_psp_uninit(struct netdevsim *ns, bool teardown) +{ + struct psp_dev *psd; + + psd = rcu_dereference_protected(ns->psp.dev, + teardown || + lockdep_is_held(&ns->psp.rereg_lock)); + if (psd) { + rcu_assign_pointer(ns->psp.dev, NULL); + synchronize_rcu(); + psp_dev_unregister(psd); + } + WARN_ON(ns->psp.assoc_cnt); +} + void nsim_psp_uninit(struct netdevsim *ns) { - if (!IS_ERR(ns->psp.dev)) - psp_dev_unregister(ns->psp.dev); - WARN_ON(ns->psp.assoc_cnt); + debugfs_remove(ns->psp.rereg); + mutex_destroy(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns, true); } static ssize_t @@ -221,14 +237,23 @@ nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; - int err; + struct psp_dev *psd; + ssize_t ret; - nsim_psp_uninit(ns); + mutex_lock(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns, false); - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); - return err ?: count; + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) { + ret = PTR_ERR(psd); + goto out; + } + + rcu_assign_pointer(ns->psp.dev, psd); + ret = count; +out: + mutex_unlock(&ns->psp.rereg_lock); + return ret; } static const struct file_operations nsim_psp_rereg_fops = { @@ -241,14 +266,16 @@ static const struct file_operations nsim_psp_rereg_fops = { int nsim_psp_init(struct netdevsim *ns) { struct dentry *ddir = ns->nsim_dev_port->ddir; - int err; + struct psp_dev *psd; - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); - if (err) - return err; + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) + return PTR_ERR(psd); - debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops); + rcu_assign_pointer(ns->psp.dev, psd); + + mutex_init(&ns->psp.rereg_lock); + ns->psp.rereg = debugfs_create_file("psp_rereg", 0200, ddir, ns, + &nsim_psp_rereg_fops); return 0; }
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index db43a1f..22c555d 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c
@@ -85,17 +85,24 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) skb_scrub_packet(skb, true); /* network header reset in ovpn_decrypt_post() */ + skb_reset_mac_header(skb); skb_reset_transport_header(skb); skb_reset_inner_headers(skb); /* cause packet to be "received" by the interface */ pkt_len = skb->len; + /* we may get here in process context in case of TCP connections, + * therefore we have to disable BHs to ensure gro_cells_receive() + * and dev_dstats_rx_add() do not get corrupted or enter deadlock + */ + local_bh_disable(); ret = gro_cells_receive(&peer->ovpn->gro_cells, skb); if (likely(ret == NET_RX_SUCCESS)) { /* update RX stats with the size of decrypted packet */ ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len); dev_dstats_rx_add(peer->ovpn->dev, pkt_len); } + local_bh_enable(); } void ovpn_decrypt_post(void *data, int ret)
diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5198d66..b64bead 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c
@@ -563,6 +563,15 @@ void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, } EXPORT_SYMBOL_GPL(bcm_phy_get_stats); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++) + bcm_phy_get_stat(phydev, shadow, i); +} +EXPORT_SYMBOL_GPL(bcm_phy_update_stats_shadow); + void bcm_phy_r_rc_cal_reset(struct phy_device *phydev) { /* Reset R_CAL/RC_CAL Engine */
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index bceddbc..bba94ce 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h
@@ -85,6 +85,7 @@ int bcm_phy_get_sset_count(struct phy_device *phydev); void bcm_phy_get_strings(struct phy_device *phydev, u8 *data); void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, struct ethtool_stats *stats, u64 *data); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow); void bcm_phy_r_rc_cal_reset(struct phy_device *phydev); int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev); int bcm_phy_enable_jumbo(struct phy_device *phydev);
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 00e8fa1..71a163f 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c
@@ -807,6 +807,17 @@ static void bcm7xxx_28nm_get_phy_stats(struct phy_device *phydev, bcm_phy_get_stats(phydev, priv->stats, stats, data); } +static int bcm7xxx_28nm_suspend(struct phy_device *phydev) +{ + struct bcm7xxx_phy_priv *priv = phydev->priv; + + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + + return genphy_suspend(phydev); +} + static int bcm7xxx_28nm_probe(struct phy_device *phydev) { struct bcm7xxx_phy_priv *priv; @@ -849,6 +860,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_config_init, \ .resume = bcm7xxx_28nm_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_tunable = bcm7xxx_28nm_get_tunable, \ .set_tunable = bcm7xxx_28nm_set_tunable, \ .get_sset_count = bcm_phy_get_sset_count, \ @@ -866,6 +878,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_ephy_config_init, \ .resume = bcm7xxx_28nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_sset_count = bcm_phy_get_sset_count, \ .get_strings = bcm_phy_get_strings, \ .get_stats = bcm7xxx_28nm_get_phy_stats, \ @@ -902,6 +915,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .config_aneg = genphy_config_aneg, \ .read_status = genphy_read_status, \ .resume = bcm7xxx_16nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ } static struct phy_driver bcm7xxx_driver[] = {
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bf0c6a0..d1a4edb 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c
@@ -592,8 +592,13 @@ static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state) static int bcm54xx_suspend(struct phy_device *phydev) { + struct bcm54xx_phy_priv *priv = phydev->priv; int ret = 0; + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + bcm54xx_ptp_stop(phydev); /* Acknowledge any Wake-on-LAN interrupt prior to suspend */
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2aa1dedd..e211a52 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c
@@ -4548,6 +4548,13 @@ static int lan8814_config_init(struct phy_device *phydev) struct kszphy_priv *lan8814 = phydev->priv; int ret; + if (phy_package_init_once(phydev)) + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + /* Based on the interface type select how the advertise ability is * encoded, to set as SGMII or as USGMII. */ @@ -4655,13 +4662,7 @@ static int lan8814_probe(struct phy_device *phydev) priv->is_ptp_available = err == LAN8814_REV_LAN8814 || err == LAN8814_REV_LAN8818; - if (phy_package_init_once(phydev)) { - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - + if (phy_package_probe_once(phydev)) { err = lan8814_release_coma_mode(phydev); if (err) return err;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index df0bcfed..293ef80 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c
@@ -756,6 +756,7 @@ static void ax88772_mac_link_down(struct phylink_config *config, struct usbnet *dev = netdev_priv(to_net_dev(config->dev)); asix_write_medium_mode(dev, 0, 0); + usbnet_link_change(dev, false, false); } static void ax88772_mac_link_up(struct phylink_config *config, @@ -786,6 +787,7 @@ static void ax88772_mac_link_up(struct phylink_config *config, m |= AX_MEDIUM_RFC; asix_write_medium_mode(dev, m, 0); + usbnet_link_change(dev, true, false); } static const struct phylink_mac_ops ax88772_phylink_mac_ops = {
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index bb99297..0223a17 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c
@@ -2012,6 +2012,14 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&apple_private_interface_info, }, + /* Mac */ + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 0), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 2), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + /* Ericsson MBM devices like F5521gw */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7337bf1..1ace1d2 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c
@@ -10138,6 +10138,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02c) }, {} };
diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e35df71..0cfb19b 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c
@@ -972,7 +972,8 @@ static int veth_poll(struct napi_struct *napi, int budget) /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; + peer_txq = (peer_dev && queue_idx < peer_dev->real_num_tx_queues) ? + netdev_get_tx_queue(peer_dev, queue_idx) : NULL; xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats);
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 3bd57527..15bfb78 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -773,11 +773,6 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) kfree(priv->tx_skbuff); priv->tx_skbuff = NULL; - if (priv->uf_regs) { - iounmap(priv->uf_regs); - priv->uf_regs = NULL; - } - if (priv->uccf) { ucc_fast_free(priv->uccf); priv->uccf = NULL; @@ -1255,12 +1250,12 @@ static void ucc_hdlc_remove(struct platform_device *pdev) uhdlc_memclean(priv); - if (priv->utdm->si_regs) { + if (priv->utdm && priv->utdm->si_regs) { iounmap(priv->utdm->si_regs); priv->utdm->si_regs = NULL; } - if (priv->utdm->siram) { + if (priv->utdm && priv->utdm->siram) { iounmap(priv->utdm->siram); priv->utdm->siram = NULL; }
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 876aed7..efb9f02 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -46,6 +46,7 @@ depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_SMEM depends on QCOM_RPROC_COMMON || QCOM_RPROC_COMMON=n + select POWER_SEQUENCING select QCOM_SCM select QCOM_QMI_HELPERS help
diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 2519e24..980a12fb 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c
@@ -1838,10 +1838,22 @@ static struct ath12k_hw_group *ath12k_core_hw_group_alloc(struct ath12k_base *ab return ag; } +static void ath12k_core_free_wsi_info(struct ath12k_hw_group *ag) +{ + int i; + + for (i = 0; i < ag->num_devices; i++) { + of_node_put(ag->wsi_node[i]); + ag->wsi_node[i] = NULL; + } + ag->num_devices = 0; +} + static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag) { mutex_lock(&ath12k_hw_group_mutex); + ath12k_core_free_wsi_info(ag); list_del(&ag->list); kfree(ag); @@ -1867,52 +1879,59 @@ static struct ath12k_hw_group *ath12k_core_hw_group_find_by_dt(struct ath12k_bas static int ath12k_core_get_wsi_info(struct ath12k_hw_group *ag, struct ath12k_base *ab) { - struct device_node *wsi_dev = ab->dev->of_node, *next_wsi_dev; - struct device_node *tx_endpoint, *next_rx_endpoint; - int device_count = 0; + struct device_node *next_wsi_dev; + int device_count = 0, ret = 0; + struct device_node *wsi_dev; - next_wsi_dev = wsi_dev; - - if (!next_wsi_dev) + wsi_dev = of_node_get(ab->dev->of_node); + if (!wsi_dev) return -ENODEV; do { - ag->wsi_node[device_count] = next_wsi_dev; + if (device_count >= ATH12K_MAX_DEVICES) { + ath12k_warn(ab, "device count in DT %d is more than limit %d\n", + device_count, ATH12K_MAX_DEVICES); + ret = -EINVAL; + break; + } - tx_endpoint = of_graph_get_endpoint_by_regs(next_wsi_dev, 0, -1); + ag->wsi_node[device_count++] = of_node_get(wsi_dev); + + struct device_node *tx_endpoint __free(device_node) = + of_graph_get_endpoint_by_regs(wsi_dev, 0, -1); if (!tx_endpoint) { - of_node_put(next_wsi_dev); - return -ENODEV; + ret = -ENODEV; + break; } - next_rx_endpoint = of_graph_get_remote_endpoint(tx_endpoint); + struct device_node *next_rx_endpoint __free(device_node) = + of_graph_get_remote_endpoint(tx_endpoint); if (!next_rx_endpoint) { - of_node_put(next_wsi_dev); - of_node_put(tx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(tx_endpoint); - of_node_put(next_wsi_dev); - next_wsi_dev = of_graph_get_port_parent(next_rx_endpoint); if (!next_wsi_dev) { - of_node_put(next_rx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(next_rx_endpoint); + of_node_put(wsi_dev); + wsi_dev = next_wsi_dev; + } while (ab->dev->of_node != wsi_dev); - device_count++; - if (device_count > ATH12K_MAX_DEVICES) { - ath12k_warn(ab, "device count in DT %d is more than limit %d\n", - device_count, ATH12K_MAX_DEVICES); - of_node_put(next_wsi_dev); - return -EINVAL; + if (ret) { + while (--device_count >= 0) { + of_node_put(ag->wsi_node[device_count]); + ag->wsi_node[device_count] = NULL; } - } while (wsi_dev != next_wsi_dev); - of_node_put(next_wsi_dev); + of_node_put(wsi_dev); + return ret; + } + + of_node_put(wsi_dev); ag->num_devices = device_count; return 0; @@ -1983,9 +2002,9 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a ath12k_core_get_wsi_index(ag, ab)) { ath12k_dbg(ab, ATH12K_DBG_BOOT, "unable to get wsi info from dt, grouping single device"); + ath12k_core_free_wsi_info(ag); ag->id = ATH12K_INVALID_GROUP_ID; ag->num_devices = 1; - memset(ag->wsi_node, 0, sizeof(ag->wsi_node)); wsi->index = 0; }
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 250459f..b108ccd 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -565,6 +565,9 @@ static int ath12k_dp_prepare_reo_update_elem(struct ath12k_dp *dp, lockdep_assert_held(&dp->dp_lock); + if (!peer->primary_link) + return 0; + elem = kzalloc_obj(*elem, GFP_ATOMIC); if (!elem) return -ENOMEM; @@ -1337,7 +1340,7 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; - peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rx_info->peer_id); + peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rxcb->peer_id); pubsta = peer ? peer->sta : NULL;
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index fbdfe64..df2334f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -788,7 +788,7 @@ struct ath12k_link_vif *ath12k_mac_get_arvif(struct ath12k *ar, u32 vdev_id) /* To use the arvif returned, caller must have held rcu read lock. */ - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif_iter.vdev_id = vdev_id; arvif_iter.ar = ar;
diff --git a/drivers/net/wireless/ath/ath12k/p2p.c b/drivers/net/wireless/ath/ath12k/p2p.c index 5958974..19ebcd1 100644 --- a/drivers/net/wireless/ath/ath12k/p2p.c +++ b/drivers/net/wireless/ath/ath12k/p2p.c
@@ -123,7 +123,7 @@ static void ath12k_p2p_noa_update_vdev_iter(void *data, u8 *mac, struct ath12k_p2p_noa_arg *arg = data; struct ath12k_link_vif *arvif; - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif = &ahvif->deflink; if (!arvif->is_created || arvif->ar != arg->ar || arvif->vdev_id != arg->vdev_id) return;
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 65a05a95..b5e904a 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -9778,7 +9778,7 @@ static void ath12k_wmi_rssi_dbm_conversion_params_info_event(struct ath12k_base *ab, struct sk_buff *skb) { - struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info; + struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info = {}; struct ath12k *ar; s32 noise_floor; u32 pdev_id; @@ -10251,7 +10251,7 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a { struct wmi_hw_data_filter_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10275,7 +10275,13 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a "wmi hw data filter enable %d filter_bitmap 0x%x\n", arg->enable, arg->hw_filter_bitmap); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) @@ -10283,6 +10289,7 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) struct wmi_wow_host_wakeup_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10295,14 +10302,20 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow host wakeup ind\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_enable(struct ath12k *ar) { struct wmi_wow_enable_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10317,7 +10330,13 @@ int ath12k_wmi_wow_enable(struct ath12k *ar) cmd->pause_iface_config = cpu_to_le32(WOW_IFACE_PAUSE_ENABLED); ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow enable\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, @@ -10327,6 +10346,7 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, struct wmi_wow_add_del_event_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10343,7 +10363,13 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow add wakeup event %s enable %d vdev_id %d\n", wow_wakeup_event(event), enable, vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, @@ -10356,6 +10382,7 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, struct sk_buff *skb; void *ptr; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*tlv) + /* array struct */ @@ -10435,7 +10462,13 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, ath12k_dbg_dump(ar->ab, ATH12K_DBG_WMI, NULL, "wow bitmask: ", bitmap->bitmaskbuf, pattern_len); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) @@ -10443,6 +10476,7 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) struct wmi_wow_del_pattern_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10459,7 +10493,13 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow del pattern vdev_id %d pattern_id %d\n", vdev_id, pattern_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static struct sk_buff * @@ -10595,6 +10635,7 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, struct wmi_pno_scan_req_arg *pno_scan) { struct sk_buff *skb; + int ret; if (pno_scan->enable) skb = ath12k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan); @@ -10604,7 +10645,13 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, if (IS_ERR_OR_NULL(skb)) return -ENOMEM; - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static void ath12k_wmi_fill_ns_offload(struct ath12k *ar, @@ -10717,6 +10764,7 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, void *buf_ptr; size_t len; u8 ns_cnt, ns_ext_tuples = 0; + int ret; ns_cnt = offload->ipv6_count; @@ -10752,7 +10800,13 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, if (ns_ext_tuples) ath12k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, @@ -10762,7 +10816,7 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; __le64 replay_ctr; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10789,7 +10843,13 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "offload gtk rekey vdev: %d %d\n", arvif->vdev_id, enable); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, @@ -10797,7 +10857,7 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, { struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10811,7 +10871,13 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "get gtk rekey vdev_id: %d\n", arvif->vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_sta_keepalive(struct ath12k *ar, @@ -10822,6 +10888,7 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, struct wmi_sta_keepalive_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*arp); skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); @@ -10849,7 +10916,13 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, "wmi sta keepalive vdev %d enabled %d method %d interval %d\n", arg->vdev_id, arg->enabled, arg->method, arg->interval); - return ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params)
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 05c9c07..6ca31d4 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -1738,7 +1738,8 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb, } info->status.rates[ts->ts_final_idx].count = ts->ts_final_retry; - info->status.rates[ts->ts_final_idx + 1].idx = -1; + if (ts->ts_final_idx + 1 < IEEE80211_TX_MAX_RATES) + info->status.rates[ts->ts_final_idx + 1].idx = -1; if (unlikely(ts->ts_status)) { ah->stats.ack_fail++;
diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c index 7651b1b..f0b0825 100644 --- a/drivers/net/wireless/broadcom/b43/xmit.c +++ b/drivers/net/wireless/broadcom/b43/xmit.c
@@ -702,7 +702,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) * key index, but the ucode passed it slightly different. */ keyidx = b43_kidx_to_raw(dev, keyidx); - B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key)); + if (B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key))) + goto drop; if (dev->key[keyidx].algorithm != B43_SEC_ALGO_NONE) { wlhdr_len = ieee80211_hdrlen(fctl);
diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c index efd63f4..ee199d4e 100644 --- a/drivers/net/wireless/broadcom/b43legacy/xmit.c +++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c
@@ -476,7 +476,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev, * key index, but the ucode passed it slightly different. */ keyidx = b43legacy_kidx_to_raw(dev, keyidx); - B43legacy_WARN_ON(keyidx >= dev->max_nr_keys); + if (B43legacy_WARN_ON(keyidx >= dev->max_nr_keys)) + goto drop; if (dev->key[keyidx].algorithm != B43legacy_SEC_ALGO_NONE) { /* Remove PROTECTED flag to mark it as decrypted. */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 30f6fcb..8fb5957 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -2476,8 +2476,9 @@ static void brcmf_sdio_bus_stop(struct device *dev) brcmf_dbg(TRACE, "Enter\n"); if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } @@ -4567,8 +4568,9 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) if (bus) { /* Stop watchdog task */ if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; }
diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 4fae0e3..5cc0c5c 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c
@@ -310,6 +310,7 @@ static void if_usb_disconnect(struct usb_interface *intf) struct lbs_private *priv = cardp->priv; cardp->surprise_removed = 1; + wake_up(&cardp->fw_wq); if (priv) { lbs_stop_card(priv); @@ -633,9 +634,10 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff, unsigned long flags; u8 i; - if (recvlength > LBS_CMD_BUFFER_SIZE) { + if (recvlength < MESSAGE_HEADER_LEN || + recvlength > LBS_CMD_BUFFER_SIZE) { lbs_deb_usbd(&cardp->udev->dev, - "The receive buffer is too large\n"); + "The receive buffer is invalid: %d\n", recvlength); kfree_skb(skb); return; }
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 591602b..3cdf9de 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -70,12 +70,11 @@ static inline int rsi_create_kthread(struct rsi_common *common, return 0; } -static inline int rsi_kill_thread(struct rsi_thread *handle) +static inline void rsi_kill_thread(struct rsi_thread *handle) { atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - - return kthread_stop(handle->task); + wait_for_completion(&handle->completion); } void rsi_mac80211_detach(struct rsi_hw *hw);
diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index 84eb15d..120f037 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -264,14 +264,12 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) wiphy_err(priv->hw->wiphy, "PM request failed: %d. WoW is disabled.\n", ret); cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EBUSY; } /* Force resume if event is coming from the device. */ if (atomic_read(&priv->bh_rx)) { cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EAGAIN; }
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 7968e20..adb29d3 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
@@ -457,8 +457,20 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf offset = sizeof(struct feature_query); for (i = 0; i < FEATURE_COUNT && offset < data_length; i++) { + size_t remaining = data_length - offset; + size_t feat_data_len, feat_total; + + if (remaining < sizeof(*rt_feature)) + break; + rt_feature = data + offset; - offset += sizeof(*rt_feature) + le32_to_cpu(rt_feature->data_len); + feat_data_len = le32_to_cpu(rt_feature->data_len); + + if (feat_data_len > remaining - sizeof(*rt_feature)) + break; + + feat_total = sizeof(*rt_feature) + feat_data_len; + offset += feat_total; ft_spt_cfg = FIELD_GET(FEATURE_MSK, core->feature_set[i]); if (ft_spt_cfg != MTK_FEATURE_MUST_BE_SUPPORTED) @@ -468,8 +480,10 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED) return -EINVAL; - if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) - t7xx_port_enum_msg_handler(ctl->md, rt_feature->data); + if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) { + t7xx_port_enum_msg_handler(ctl->md, rt_feature->data, + feat_data_len); + } } return 0;
diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c index ae632ef..f869e4e 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c +++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c
@@ -117,6 +117,7 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * t7xx_port_enum_msg_handler() - Parse the port enumeration message to create/remove nodes. * @md: Modem context. * @msg: Message. + * @msg_len: Length of @msg in bytes. * * Used to control create/remove device node. * @@ -124,12 +125,18 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * * 0 - Success. * * -EFAULT - Message check failure. */ -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len) { struct device *dev = &md->t7xx_dev->pdev->dev; unsigned int version, port_count, i; struct port_msg *port_msg = msg; + if (msg_len < sizeof(*port_msg)) { + dev_err(dev, "Port enum msg too short for header: need %zu, have %zu\n", + sizeof(*port_msg), msg_len); + return -EINVAL; + } + version = FIELD_GET(PORT_MSG_VERSION, le32_to_cpu(port_msg->info)); if (version != PORT_ENUM_VER || le32_to_cpu(port_msg->head_pattern) != PORT_ENUM_HEAD_PATTERN || @@ -141,6 +148,13 @@ int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) } port_count = FIELD_GET(PORT_MSG_PRT_CNT, le32_to_cpu(port_msg->info)); + + if (msg_len < struct_size(port_msg, data, port_count)) { + dev_err(dev, "Port enum msg too short: need %zu, have %zu\n", + struct_size(port_msg, data, port_count), msg_len); + return -EINVAL; + } + for (i = 0; i < port_count; i++) { u32 port_info = le32_to_cpu(port_msg->data[i]); unsigned int ch_id; @@ -191,7 +205,7 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb) case CTL_ID_PORT_ENUM: skb_pull(skb, sizeof(*ctrl_msg_h)); - ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data); + ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data, skb->len); if (!ret) ret = port_ctl_send_msg_to_md(port, CTL_ID_PORT_ENUM, 0); else
diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h index f0918b3..7c3190b 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h +++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h
@@ -103,7 +103,7 @@ void t7xx_port_proxy_reset(struct port_proxy *port_prox); void t7xx_port_proxy_uninit(struct port_proxy *port_prox); int t7xx_port_proxy_init(struct t7xx_modem *md); void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int state); -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg); +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len); int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id, bool en_flag); void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id);
diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index ef6125d..a5b80cd 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c
@@ -193,8 +193,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { - kfree(lasi); - return ret; + goto err_free; } /* enable IRQ's for devices below LASI */ @@ -203,8 +202,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) /* Done init'ing, register this driver */ ret = gsc_common_setup(dev, lasi); if (ret) { - kfree(lasi); - return ret; + goto err_irq; } gsc_fixup_irqs(dev, lasi, lasi_choose_irq); @@ -214,6 +212,12 @@ static int __init lasi_init_chip(struct parisc_device *dev) SYS_OFF_PRIO_DEFAULT, lasi_power_off, lasi); return ret; + +err_irq: + free_irq(lasi->gsc_irq.irq, lasi); +err_free: + kfree(lasi); + return ret; } static struct parisc_device_id lasi_tbl[] __initdata = {
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index b299fcc..016c9d5 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c
@@ -543,10 +543,8 @@ static void __init register_led_regions(void) static int __init startup_leds(void) { - if (platform_device_register(&platform_leds)) { - pr_info("LED: failed to register LEDs\n"); - platform_device_put(&platform_leds); - } + if (platform_device_register(&platform_leds)) + printk(KERN_INFO "LED: failed to register LEDs\n"); register_led_regions(); return 0; }
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d10ece0..e3f5900 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c
@@ -179,6 +179,11 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, return NULL; } +static void _pci_free_device(struct device *dev) +{ + kfree(to_pci_dev(dev)); +} + /** * new_id_store - sysfs frontend to pci_add_dynid() * @driver: target device driver @@ -214,11 +219,13 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf, pdev->subsystem_vendor = subvendor; pdev->subsystem_device = subdevice; pdev->class = class; + pdev->dev.release = _pci_free_device; + device_initialize(&pdev->dev); if (pci_match_device(pdrv, pdev)) retval = -EEXIST; - kfree(pdev); + put_device(&pdev->dev); if (retval) return retval;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8f7cfcc..d342666 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c
@@ -5607,13 +5607,14 @@ static int pci_try_reset_bus(struct pci_bus *bus) * reset for affected devices * * This function will first try to reset the slots on this bus if the method is - * available. If slot reset fails or is not available, this will fall back to a + * available. If slot reset is not available, this will fall back to a * secondary bus reset. */ static int pci_reset_bridge(struct pci_dev *bridge, bool restore) { struct pci_bus *bus = bridge->subordinate; struct pci_slot *slot; + int ret = 0; if (!bus) return -ENOTTY; @@ -5627,19 +5628,17 @@ static int pci_reset_bridge(struct pci_dev *bridge, bool restore) goto bus_reset; list_for_each_entry(slot, &bus->slots, list) { - int ret; - if (restore) ret = pci_try_reset_slot(slot); else ret = pci_slot_reset(slot, PCI_RESET_DO_RESET); if (ret) - goto bus_reset; + break; } mutex_unlock(&pci_slot_mutex); - return 0; + return ret; bus_reset: mutex_unlock(&pci_slot_mutex);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index fbc05cd..991d3ed 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c
@@ -102,6 +102,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) } pci_write_config_dword(dev, reg, new); + dev->saved_config_space[reg / 4] = new; pci_read_config_dword(dev, reg, &check); if ((new ^ check) & mask) { @@ -112,6 +113,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) if (res->flags & IORESOURCE_MEM_64) { new = region.start >> 16 >> 16; pci_write_config_dword(dev, reg + 4, new); + dev->saved_config_space[(reg + 4) / 4] = new; pci_read_config_dword(dev, reg + 4, &check); if (check != new) { pci_err(dev, "%s: error updating (high %#010x != %#010x)\n",
diff --git a/drivers/platform/chrome/cros_typec_altmode.c b/drivers/platform/chrome/cros_typec_altmode.c index 557340b..66c546b 100644 --- a/drivers/platform/chrome/cros_typec_altmode.c +++ b/drivers/platform/chrome/cros_typec_altmode.c
@@ -359,6 +359,7 @@ cros_typec_register_thunderbolt(struct cros_typec_port *port, } INIT_WORK(&adata->work, cros_typec_altmode_work); + mutex_init(&adata->lock); adata->alt = alt; adata->port = port; adata->ap_mode_entry = true;
diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c index 7aa40da..5a2ffcb 100644 --- a/drivers/platform/wmi/core.c +++ b/drivers/platform/wmi/core.c
@@ -411,6 +411,9 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id, obj = aout.pointer; if (!obj) { + if (min_size != 0) + return -ENOMSG; + out->length = 0; out->data = ZERO_SIZE_PTR;
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index d1cc6e7..24c1512 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -243,7 +243,7 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8D41") }, - .driver_data = (void *)&victus_s_thermal_params, + .driver_data = (void *)&omen_v1_no_ec_thermal_params, }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8D87") },
diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 6040f45..6c2febe 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c
@@ -349,6 +349,8 @@ static int lwmi_om_hwmon_write(struct device *dev, enum hwmon_sensor_types type, */ if (!relax_fan_constraint) raw = val / LWMI_FAN_DIV * LWMI_FAN_DIV; + else + raw = val; err = lwmi_om_fan_get_set(priv, channel, &raw, true); if (err)
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 4d32fc6..71e930e 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c
@@ -3089,6 +3089,7 @@ static const struct bus_type genpd_bus_type = { static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + bool is_virt_dev; unsigned int i; int ret = 0; @@ -3098,6 +3099,13 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); + /* Check if the device was created by genpd at attach. */ + is_virt_dev = dev->bus == &genpd_bus_type; + + /* Disable runtime PM if we enabled it at attach. */ + if (is_virt_dev) + pm_runtime_disable(dev); + /* Drop the default performance state */ if (dev_gpd_data(dev)->default_pstate) { dev_pm_genpd_set_performance_state(dev, 0); @@ -3123,7 +3131,7 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) genpd_queue_power_off_work(pd); /* Unregister the device if it was created by genpd. */ - if (dev->bus == &genpd_bus_type) + if (is_virt_dev) device_unregister(dev); }
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index d2b8d03..e1cfd42 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -1015,6 +1015,7 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s struct device_node *node, *smi_np; int num_regmaps = 0, i, j; struct regmap *regmap[3]; + int ret = 0; /* * Legacy code retrieves a maximum of three bus protection handles: @@ -1065,11 +1066,14 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s if (node) { regmap[2] = syscon_regmap_lookup_by_phandle(node, "mediatek,infracfg-nao"); num_regmaps++; - of_node_put(node); - if (IS_ERR(regmap[2])) - return dev_err_probe(dev, PTR_ERR(regmap[2]), + if (IS_ERR(regmap[2])) { + ret = dev_err_probe(dev, PTR_ERR(regmap[2]), "%pOF: failed to get infracfg regmap\n", node); + of_node_put(node); + return ret; + } + of_node_put(node); } else { regmap[2] = NULL; }
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 6e4cb28..0dcb50b 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1512,7 +1512,7 @@ static const struct rpmh_vreg_init_data pmh0101_vreg_data[] = { RPMH_VREG("ldo13", LDO, 13, &pmic5_pldo530_mvp150, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", LDO, 14, &pmic5_pldo530_mvp150, "vdd-l2-l13-l14"), RPMH_VREG("ldo15", LDO, 15, &pmic5_nldo530, "vdd-l15"), - RPMH_VREG("ldo16", LDO, 15, &pmic5_pldo530_mvp600, "vdd-l5-l16"), + RPMH_VREG("ldo16", LDO, 16, &pmic5_pldo530_mvp600, "vdd-l5-l16"), RPMH_VREG("ldo17", LDO, 17, &pmic5_pldo515_mv, "vdd-l17"), RPMH_VREG("ldo18", LDO, 18, &pmic5_nldo530, "vdd-l18"), RPMH_VREG("bob1", BOB, 1, &pmic5_bob, "vdd-bob1"),
diff --git a/drivers/reset/reset-eyeq.c b/drivers/reset/reset-eyeq.c index 791b728..1a38579 100644 --- a/drivers/reset/reset-eyeq.c +++ b/drivers/reset/reset-eyeq.c
@@ -422,13 +422,6 @@ static int eqr_of_xlate_twocells(struct reset_controller_dev *rcdev, return eqr_of_xlate_internal(rcdev, reset_spec->args[0], reset_spec->args[1]); } -static void eqr_of_node_put(void *_dev) -{ - struct device *dev = _dev; - - of_node_put(dev->of_node); -} - static int eqr_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -439,21 +432,8 @@ static int eqr_probe(struct auxiliary_device *adev, int ret; /* - * We are an auxiliary device of clk-eyeq. We do not have an OF node by - * default; let's reuse our parent's OF node. - */ - WARN_ON(dev->of_node); - device_set_of_node_from_dev(dev, dev->parent); - if (!dev->of_node) - return -ENODEV; - - ret = devm_add_action_or_reset(dev, eqr_of_node_put, dev); - if (ret) - return ret; - - /* - * Using our newfound OF node, we can get match data. We cannot use - * device_get_match_data() because it does not match reused OF nodes. + * Get match data. We cannot use device_get_match_data() because it does + * not accept reused OF nodes; see device_set_of_node_from_dev(). */ match = of_match_node(dev->driver->of_match_table, dev->of_node); if (!match || !match->data)
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index efb08b9..80ab0ff 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -37,7 +37,7 @@ #define TPGS_MODE_EXPLICIT 0x2 #define ALUA_RTPG_SIZE 128 -#define ALUA_FAILOVER_TIMEOUT 60 +#define ALUA_FAILOVER_TIMEOUT 255 /* max 255 (8-bit value) */ #define ALUA_FAILOVER_RETRIES 5 #define ALUA_RTPG_DELAY_MSECS 5 #define ALUA_RTPG_RETRY_DELAY 2
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index fda07b1..14d563e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -1491,7 +1491,7 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba, phy_id = device->phy->identify.phy_identifier; hdr->dw0 |= cpu_to_le32((1U << phy_id) << CMD_HDR_PHY_ID_OFF); - hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; + hdr->dw0 |= cpu_to_le32(CMD_HDR_FORCE_PHY_MSK); hdr->dw0 |= cpu_to_le32(4U << CMD_HDR_CMD_OFF); }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6ff7885..12caffe 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2738,8 +2738,20 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) pcie_device->enclosure_level, pcie_device->connector_name); + /* + * The HBA firmware passes the NVMe drive's MDTS + * (Maximum Data Transfer Size) up to the driver. However, + * the driver hardcodes a 4K buffer size for the PRP list, + * accommodating at most 512 entries. This strictly limits + * the maximum supported NVMe I/O transfer to 2 MiB. + * + * Cap max_hw_sectors to the smaller of the drive's reported + * MDTS or the 2 MiB driver limit to prevent kernel oopses. + */ + lim->max_hw_sectors = SZ_2M >> SECTOR_SHIFT; if (pcie_device->nvme_mdts) - lim->max_hw_sectors = pcie_device->nvme_mdts / 512; + lim->max_hw_sectors = min(lim->max_hw_sectors, + pcie_device->nvme_mdts >> SECTOR_SHIFT); pcie_device_put(pcie_device); spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 9f59930..cd059b7 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h
@@ -657,7 +657,7 @@ struct pmcraid_hostrcb { */ struct pmcraid_instance { /* Array of allowed-to-be-exposed resources, initialized from - * Configutation Table, later updated with CCNs + * Configuration Table, later updated with CCNs */ struct pmcraid_resource_entry *res_entries;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2b4b2a1..74cd4e8 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c
@@ -1801,7 +1801,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } res = blk_rq_map_user_io(rq, md, hp->dxferp, hp->dxfer_len, - GFP_ATOMIC, iov_count, iov_count, 1, rw); + GFP_KERNEL, iov_count, iov_count, 1, rw); if (!res) { srp->bio = rq->bio;
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index b4ed991..2026ac6 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -9427,6 +9427,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev) pqi_crash_if_pending_command(ctrl_info); pqi_reset(ctrl_info); + pqi_ctrl_unblock_device_reset(ctrl_info); } static void pqi_process_lockup_action_param(void)
diff --git a/drivers/spi/spi-ch341.c b/drivers/spi/spi-ch341.c index 3eaa8f1..6448a44 100644 --- a/drivers/spi/spi-ch341.c +++ b/drivers/spi/spi-ch341.c
@@ -250,5 +250,5 @@ static struct usb_driver ch341a_usb_driver = { module_usb_driver(ch341a_usb_driver); MODULE_AUTHOR("Johannes Thumshirn <jth@kernel.org>"); -MODULE_DESCRIPTION("QiHeng Electronics ch341 USB2SPI"); +MODULE_DESCRIPTION("Nanjing Qinheng Microelectronics CH341 USB2SPI driver"); MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index e5c907c..480d1e8 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c
@@ -1382,9 +1382,7 @@ static int spi_imx_setupxfer(struct spi_device *spi, spi_imx->target_burst = t->len; } - spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t); - - return 0; + return spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t); } static void spi_imx_sdma_exit(struct spi_imx_data *spi_imx) @@ -1709,6 +1707,7 @@ static int spi_imx_dma_data_prepare(struct spi_imx_data *spi_imx, kfree(spi_imx->dma_data[0].dma_tx_buf); kfree(spi_imx->dma_data[0].dma_rx_buf); kfree(spi_imx->dma_data); + return ret; } } @@ -1836,7 +1835,7 @@ static void spi_imx_dma_max_wml_find(struct spi_imx_data *spi_imx, unsigned int i; for (i = spi_imx->devtype_data->fifo_size / 2; i > 0; i--) { - if (!dma_data->dma_len % (i * bytes_per_word)) + if (!(dma_data->dma_len % (i * bytes_per_word))) break; } /* Use 1 as wml in case no available burst length got */
diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index eab059f..4dee0fe 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c
@@ -74,6 +74,13 @@ #define STATUS_FLAGSX4 BIT(8) #define STATUS_MASK GENMASK(8, 0) +/* + * QSPI Direct Access register defines + */ +#define DIRECT_ACCESS_EN_SSEL BIT(0) +#define DIRECT_ACCESS_OP_SSEL BIT(1) +#define DIRECT_ACCESS_OP_SSEL_SHIFT 1 + #define BYTESUPPER_MASK GENMASK(31, 16) #define BYTESLOWER_MASK GENMASK(15, 0) @@ -158,7 +165,39 @@ static int mchp_coreqspi_set_mode(struct mchp_coreqspi *qspi, const struct spi_m return 0; } -static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_set_cs(struct spi_device *spi, bool enable) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + + val &= ~DIRECT_ACCESS_OP_SSEL; + val |= !enable << DIRECT_ACCESS_OP_SSEL_SHIFT; + + writel(val, qspi->regs + REG_DIRECT_ACCESS); +} + +static int mchp_coreqspi_setup(struct spi_device *spi) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + /* + * Active low devices need to be specifically set to their inactive + * states during probe. + */ + if (spi->mode & SPI_CS_HIGH) + return 0; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_OP_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + + return 0; +} + +static void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -194,7 +233,7 @@ static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) } } -static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -222,7 +261,7 @@ static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) } } -static inline void mchp_coreqspi_write_read_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_write_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -380,20 +419,7 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi return 0; } -static int mchp_coreqspi_setup_op(struct spi_device *spi_dev) -{ - struct spi_controller *ctlr = spi_dev->controller; - struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); - u32 control = readl_relaxed(qspi->regs + REG_CONTROL); - - control |= (CONTROL_MASTER | CONTROL_ENABLE); - control &= ~CONTROL_CLKIDLE; - writel_relaxed(control, qspi->regs + REG_CONTROL); - - return 0; -} - -static inline void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) +static void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) { u32 idle_cycles = 0; int total_bytes, cmd_bytes, frames, ctrl; @@ -483,6 +509,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o reinit_completion(&qspi->data_completion); mchp_coreqspi_config_op(qspi, op); + mchp_coreqspi_set_cs(mem->spi, true); if (op->cmd.opcode) { qspi->txbuf = &opcode; qspi->rxbuf = NULL; @@ -523,6 +550,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o err = -ETIMEDOUT; error: + mchp_coreqspi_set_cs(mem->spi, false); mutex_unlock(&qspi->op_lock); mchp_coreqspi_disable_ints(qspi); @@ -662,18 +690,28 @@ static int mchp_coreqspi_transfer_one(struct spi_controller *ctlr, struct spi_de struct spi_transfer *t) { struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); + bool dual_quad = false; qspi->tx_len = t->len; + if (t->tx_nbits == SPI_NBITS_QUAD || t->rx_nbits == SPI_NBITS_QUAD || + t->tx_nbits == SPI_NBITS_DUAL || + t->rx_nbits == SPI_NBITS_DUAL) + dual_quad = true; + if (t->tx_buf) qspi->txbuf = (u8 *)t->tx_buf; if (!t->rx_buf) { mchp_coreqspi_write_op(qspi); - } else { + } else if (!dual_quad) { qspi->rxbuf = (u8 *)t->rx_buf; qspi->rx_len = t->len; mchp_coreqspi_write_read_op(qspi); + } else { + qspi->rxbuf = (u8 *)t->rx_buf; + qspi->rx_len = t->len; + mchp_coreqspi_read_op(qspi); } return 0; @@ -686,6 +724,7 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; int ret; + u32 num_cs, val; ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*qspi)); if (!ctlr) @@ -718,10 +757,18 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) return ret; } + /* + * The IP core only has a single CS, any more have to be provided via + * gpios + */ + if (of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs)) + num_cs = 1; + + ctlr->num_chipselect = num_cs; + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); ctlr->mem_ops = &mchp_coreqspi_mem_ops; ctlr->mem_caps = &mchp_coreqspi_mem_caps; - ctlr->setup = mchp_coreqspi_setup_op; ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD; ctlr->dev.of_node = np; @@ -729,9 +776,21 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) ctlr->prepare_message = mchp_coreqspi_prepare_message; ctlr->unprepare_message = mchp_coreqspi_unprepare_message; ctlr->transfer_one = mchp_coreqspi_transfer_one; - ctlr->num_chipselect = 2; + ctlr->setup = mchp_coreqspi_setup; + ctlr->set_cs = mchp_coreqspi_set_cs; ctlr->use_gpio_descriptors = true; + val = readl_relaxed(qspi->regs + REG_CONTROL); + val |= (CONTROL_MASTER | CONTROL_ENABLE); + writel_relaxed(val, qspi->regs + REG_CONTROL); + + /* + * Put cs into software controlled mode + */ + val = readl_relaxed(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_EN_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + ret = spi_register_controller(ctlr); if (ret) return dev_err_probe(&pdev->dev, ret,
diff --git a/drivers/staging/rtl8723bs/os_dep/osdep_service.c b/drivers/staging/rtl8723bs/os_dep/osdep_service.c index 7959dae..4cfdf7c 100644 --- a/drivers/staging/rtl8723bs/os_dep/osdep_service.c +++ b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
@@ -194,7 +194,8 @@ struct rtw_cbuf *rtw_cbuf_alloc(u32 size) struct rtw_cbuf *cbuf; cbuf = kzalloc_flex(*cbuf, bufs, size); - cbuf->size = size; + if (cbuf) + cbuf->size = size; return cbuf; }
diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index be4ad47..8abaa31 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c
@@ -1230,6 +1230,8 @@ static int __init fake_init(void) err_driver: kfree(fake_bridge); err_struct: + root_device_unregister(vme_root); + return retval; }
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index e80449f..cb832fd 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c
@@ -995,6 +995,7 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int data_direction, payload_length; struct iscsi_ecdb_ahdr *ecdb_ahdr; struct iscsi_scsi_req *hdr; + u16 ahslength, cdb_length; int iscsi_task_attr; unsigned char *cdb; int sam_task_attr; @@ -1108,14 +1109,27 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, ISCSI_REASON_CMD_NOT_SUPPORTED, buf); } - cdb = kmalloc(be16_to_cpu(ecdb_ahdr->ahslength) + 15, - GFP_KERNEL); + ahslength = be16_to_cpu(ecdb_ahdr->ahslength); + if (!ahslength) { + pr_err("Extended CDB AHS with zero length, protocol error.\n"); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); + } + if (ahslength > (hdr->hlength * 4) - 3) { + pr_err("Extended CDB AHS length %u exceeds available PDU buffer.\n", + ahslength); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); + } + + cdb_length = ahslength - 1 + ISCSI_CDB_SIZE; + + cdb = kmalloc(cdb_length, GFP_KERNEL); if (cdb == NULL) return iscsit_add_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); memcpy(cdb, hdr->cdb, ISCSI_CDB_SIZE); - memcpy(cdb + ISCSI_CDB_SIZE, ecdb_ahdr->ecdb, - be16_to_cpu(ecdb_ahdr->ahslength) - 1); + memcpy(cdb + ISCSI_CDB_SIZE, ecdb_ahdr->ecdb, cdb_length - ISCSI_CDB_SIZE); } data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE :
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index d93773b..2b19a95 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c
@@ -3249,7 +3249,7 @@ static ssize_t target_tg_pt_gp_members_show(struct config_item *item, config_item_name(&lun->lun_group.cg_item)); cur_len++; /* Extra byte for NULL terminator */ - if ((cur_len + len) > PAGE_SIZE) { + if (cur_len > TG_PT_GROUP_NAME_BUF || (cur_len + len) > PAGE_SIZE) { pr_warn("Ran out of lu_gp_show_attr" "_members buffer\n"); break;
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4805e40..c3f0895 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c
@@ -9259,6 +9259,30 @@ static void ufshcd_config_mcq(struct ufs_hba *hba) hba->nutrs); } +/** + * ufshcd_get_op_mode - get UFS operating mode. + * @hba: per-adapter instance + * + * Use the PA_PWRMODE value to represent the operating mode of UFS. + * + */ +static enum ufs_op_mode ufshcd_get_op_mode(struct ufs_hba *hba) +{ + u32 mode; + u8 rx_mode; + u8 tx_mode; + + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_PWRMODE), &mode); + rx_mode = (mode >> PWRMODE_RX_OFFSET) & PWRMODE_MASK; + tx_mode = mode & PWRMODE_MASK; + + if ((rx_mode == SLOW_MODE || rx_mode == SLOWAUTO_MODE) && + (tx_mode == SLOW_MODE || tx_mode == SLOWAUTO_MODE)) + return LS_MODE; + + return HS_MODE; +} + static int ufshcd_post_device_init(struct ufs_hba *hba) { int ret; @@ -9281,11 +9305,13 @@ static int ufshcd_post_device_init(struct ufs_hba *hba) return 0; /* - * Set the right value to bRefClkFreq before attempting to + * Set the right value to bRefClkFreq in LS_MODE before attempting to * switch to HS gears. */ - if (hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL) + if (ufshcd_get_op_mode(hba) == LS_MODE && + hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL) ufshcd_set_dev_ref_clk(hba); + /* Gear up to HS gear. */ ret = ufshcd_config_pwr_mode(hba, &hba->max_pwr_info.info, UFSHCD_PMC_POLICY_DONT_FORCE);
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 669b9e6..7464147 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c
@@ -1178,7 +1178,7 @@ static int usblp_probe(struct usb_interface *intf, } /* Allocate buffer for printer status */ - usblp->statusbuf = kmalloc(STATUS_BUF_SIZE, GFP_KERNEL); + usblp->statusbuf = kzalloc(STATUS_BUF_SIZE, GFP_KERNEL); if (!usblp->statusbuf) { retval = -ENOMEM; goto abort; @@ -1377,6 +1377,7 @@ static int usblp_cache_device_id_string(struct usblp *usblp) { int err, length; + memset(usblp->device_id_string, 0, USBLP_DEVICE_ID_SIZE); err = usblp_get_id(usblp, 0, usblp->device_id_string, USBLP_DEVICE_ID_SIZE - 1); if (err < 0) { dev_dbg(&usblp->intf->dev,
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index b34fb65..9b69148 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c
@@ -286,12 +286,15 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) ACPI_COMPANION_SET(&ulpi->dev, ACPI_COMPANION(dev)); ret = ulpi_of_register(ulpi); - if (ret) + if (ret) { + kfree(ulpi); return ret; + } ret = ulpi_read_id(ulpi); if (ret) { of_node_put(ulpi->dev.of_node); + kfree(ulpi); return ret; }
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 58899b1..6521389 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c
@@ -1359,12 +1359,6 @@ int dwc3_core_init(struct dwc3 *dwc) hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); - /* - * Write Linux Version Code to our GUID register so it's easy to figure - * out which kernel version a bug was found. - */ - dwc3_writel(dwc, DWC3_GUID, LINUX_VERSION_CODE); - ret = dwc3_phy_setup(dwc); if (ret) return ret; @@ -1398,6 +1392,12 @@ int dwc3_core_init(struct dwc3 *dwc) if (ret) goto err_exit_phy; + /* + * Write Linux Version Code to our GUID register so it's easy to figure + * out which kernel version a bug was found. + */ + dwc3_writel(dwc, DWC3_GUID, LINUX_VERSION_CODE); + dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc);
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 91139ae..f3ca79c 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -733,8 +733,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) if (status == 0) { omap_writew(reg, UDC_TXDMA_CFG); /* EMIFF or SDRC */ - omap_set_dma_src_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_src_data_pack(ep->lch, 1); /* TIPB */ omap_set_dma_dest_params(ep->lch, @@ -756,8 +754,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) UDC_DATA_DMA, 0, 0); /* EMIFF or SDRC */ - omap_set_dma_dest_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_dest_data_pack(ep->lch, 1); } }
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c714618..42e4cec 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c
@@ -1513,7 +1513,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1251, 0xff) }, /* Telit LE910Cx (RNDIS) */ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1252, 0xff) }, /* Telit LE910Cx (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1253, 0xff) }, /* Telit LE910Cx (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1254, 0xff) }, /* Telit LE910Cx */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1255, 0xff) }, /* Telit LE910Cx */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261),
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index dfbb94d..55fee96 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -732,9 +732,14 @@ static const char * const pd_rev[] = { (tcpm_cc_is_source((port)->cc2) && \ !tcpm_cc_is_source((port)->cc1))) +#define tcpm_port_is_debug_source(port) \ + (tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) + +#define tcpm_port_is_debug_sink(port) \ + (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2)) + #define tcpm_port_is_debug(port) \ - ((tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) || \ - (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2))) + (tcpm_port_is_debug_source(port) || tcpm_port_is_debug_sink(port)) #define tcpm_port_is_audio(port) \ (tcpm_cc_is_audio((port)->cc1) && tcpm_cc_is_audio((port)->cc2)) @@ -5176,7 +5181,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SNK_UNATTACHED, PD_T_DRP_SNK); break; case SRC_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_source(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, port->timings.cc_debounce_time); else if (tcpm_port_is_audio(port)) @@ -5434,7 +5439,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SRC_UNATTACHED, PD_T_DRP_SRC); break; case SNK_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -5454,7 +5459,7 @@ static void run_state_machine(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) tcpm_set_state(port, SNK_UNATTACHED, PD_T_PD_DEBOUNCE); - else if (tcpm_port_is_debug(port)) + else if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -5935,6 +5940,8 @@ static void run_state_machine(struct tcpm_port *port) /* remove existing capabilities */ tcpm_partner_source_caps_reset(port); tcpm_pd_send_control(port, PD_CTRL_ACCEPT, TCPC_TX_SOP); + port->vdm_sm_running = false; + port->explicit_contract = false; tcpm_ams_finish(port); if (port->pwr_role == TYPEC_SOURCE) { port->upcoming_state = SRC_SEND_CAPABILITIES; @@ -6360,10 +6367,10 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, switch (port->state) { case TOGGLING: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); - else if (tcpm_port_is_sink(port)) + else if (tcpm_port_is_debug_sink(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case CHECK_CONTAMINANT: @@ -6371,9 +6378,11 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, break; case SRC_UNATTACHED: case ACC_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); + else if (tcpm_port_is_debug_sink(port)) + tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case SRC_ATTACH_WAIT: if (tcpm_port_is_disconnected(port) || @@ -6395,7 +6404,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, } break; case SNK_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_sink(port) || tcpm_port_is_audio(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break;
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index a12dd25a..fd00b86 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c
@@ -14,7 +14,6 @@ #include <linux/export.h> #include <linux/string.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/delay.h> #include <linux/interrupt.h>
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index c341d76..fdbb867 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c
@@ -321,12 +321,32 @@ static int dlfb_set_video_mode(struct dlfb_data *dlfb, return retval; } +static void dlfb_vm_open(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_inc(&dlfb->mmap_count); +} + +static void dlfb_vm_close(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_dec(&dlfb->mmap_count); +} + +static const struct vm_operations_struct dlfb_vm_ops = { + .open = dlfb_vm_open, + .close = dlfb_vm_close, +}; + static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) { unsigned long start = vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start; unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long page, pos; + struct dlfb_data *dlfb = info->par; if (info->fbdefio) return fb_deferred_io_mmap(info, vma); @@ -358,6 +378,9 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } + vma->vm_ops = &dlfb_vm_ops; + vma->vm_private_data = dlfb; + atomic_inc(&dlfb->mmap_count); return 0; } @@ -1176,7 +1199,6 @@ static void dlfb_deferred_vfree(struct dlfb_data *dlfb, void *mem) /* * Assumes &info->lock held by caller - * Assumes no active clients have framebuffer open */ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info, u32 new_len) { @@ -1188,6 +1210,13 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info new_len = PAGE_ALIGN(new_len); if (new_len > old_len) { + if (atomic_read(&dlfb->mmap_count) > 0) { + dev_warn(info->dev, + "refusing realloc: %d active mmaps\n", + atomic_read(&dlfb->mmap_count)); + return -EBUSY; + } + /* * Alloc system memory for virtual framebuffer */
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 1c5224c..733c195 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c
@@ -191,13 +191,10 @@ static const struct dentry_operations efivarfs_d_ops = { static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) { + struct qstr q = QSTR(name); struct dentry *d; - struct qstr q; int err; - q.name = name; - q.len = strlen(name); - err = efivarfs_d_hash(parent, &q); if (err) return ERR_PTR(err);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 7b86a6ba..b41f478 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c
@@ -1354,7 +1354,7 @@ int ovl_ensure_verity_loaded(const struct path *datapath) struct inode *inode = d_inode(datapath->dentry); struct file *filp; - if (!fsverity_active(inode) && IS_VERITY(inode)) { + if (IS_VERITY(inode) && fsverity_get_info(inode) == NULL) { /* * If this inode was not yet opened, the verity info hasn't been * loaded yet, so we need to do that here to force it into memory.
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 02791ec..88d5e9a 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c
@@ -286,6 +286,14 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, &rqst[0], &oplock, &oparms, utf16_path); if (rc) goto oshr_free; + + if (oplock != SMB2_OPLOCK_LEVEL_II) { + rc = -EINVAL; + cifs_dbg(FYI, "%s: Oplock level %d not suitable for cached directory\n", + __func__, oplock); + goto oshr_free; + } + smb2_set_next_command(tcon, &rqst[0]); memset(&qi_iov, 0, sizeof(qi_iov));
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index ec5d477..786dbbc 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c
@@ -1264,6 +1264,17 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return 0; } +static bool dacl_offset_valid(unsigned int acl_len, __u32 dacloffset) +{ + if (acl_len < sizeof(struct smb_acl)) + return false; + + if (dacloffset < sizeof(struct smb_ntsd)) + return false; + + return dacloffset <= acl_len - sizeof(struct smb_acl); +} + /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, @@ -1284,7 +1295,6 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); - dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n", pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), le32_to_cpu(pntsd->gsidoffset), @@ -1315,11 +1325,18 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, return rc; } - if (dacloffset) + if (dacloffset) { + if (!dacl_offset_valid(acl_len, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr, get_mode_from_special_sid); - else + } else { cifs_dbg(FYI, "no ACL\n"); /* BB grant all or default perms? */ + } return rc; } @@ -1342,6 +1359,11 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); rc = validate_dacl(dacl_ptr, end_of_acl); if (rc) @@ -1710,6 +1732,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + rc = -EINVAL; + goto id_mode_to_cifs_acl_exit; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); rc = validate_dacl(dacl_ptr, (char *)pntsd + secdesclen); if (rc) { @@ -1732,7 +1760,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, * descriptor parameters, and security descriptor itself */ nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN); - pnntsd = kmalloc(nsecdesclen, GFP_KERNEL); + pnntsd = kzalloc(nsecdesclen, GFP_KERNEL); if (!pnntsd) { kfree(pntsd); cifs_put_tlink(tlink); @@ -1752,6 +1780,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } +id_mode_to_cifs_acl_exit: cifs_put_tlink(tlink); kfree(pnntsd);
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 9afab32..17408bb 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c
@@ -296,7 +296,7 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug break; case SMB2_ENCRYPTION_AES256_CCM: case SMB2_ENCRYPTION_AES256_GCM: - out.session_key_length = CIFS_SESS_KEY_SIZE; + out.session_key_length = ses->auth_key.len; out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE; break; default:
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 2869126..6c9c229 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c
@@ -111,7 +111,7 @@ static int check_wsl_eas(struct kvec *rsp_iov) u32 outlen, next; u16 vlen; u8 nlen; - u8 *end; + u8 *ea_end, *iov_end; outlen = le32_to_cpu(rsp->OutputBufferLength); if (outlen < SMB2_WSL_MIN_QUERY_EA_RESP_SIZE || @@ -120,15 +120,19 @@ static int check_wsl_eas(struct kvec *rsp_iov) ea = (void *)((u8 *)rsp_iov->iov_base + le16_to_cpu(rsp->OutputBufferOffset)); - end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + ea_end = (u8 *)ea + outlen; + iov_end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + if (ea_end > iov_end) + return -EINVAL; + for (;;) { - if ((u8 *)ea > end - sizeof(*ea)) + if ((u8 *)ea > ea_end - sizeof(*ea)) return -EINVAL; nlen = ea->ea_name_length; vlen = le16_to_cpu(ea->ea_value_length); if (nlen != SMB2_WSL_XATTR_NAME_LEN || - (u8 *)ea->ea_data + nlen + 1 + vlen > end) + (u8 *)ea->ea_data + nlen + 1 + vlen > ea_end) return -EINVAL; switch (vlen) {
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 973fce3..2a7355c 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c
@@ -241,7 +241,8 @@ smb2_check_message(char *buf, unsigned int pdu_len, unsigned int len, if (len != calc_len) { /* create failed on symlink */ if (command == SMB2_CREATE_HE && - shdr->Status == STATUS_STOPPED_ON_SYMLINK) + shdr->Status == STATUS_STOPPED_ON_SYMLINK && + len > calc_len) return 0; /* Windows 7 server returns 24 bytes more */ if (calc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE)
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index cb61051..995fcdd 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c
@@ -1713,17 +1713,30 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) is_binding = (ses->ses_status == SES_GOOD); spin_unlock(&ses->ses_lock); + /* + * Per MS-SMB2 3.2.5.3, Session.SessionKey is the first 16 bytes of the + * GSS cryptographic key, right-padded with zero bytes if shorter. + * Allocate at least SMB2_NTLMV2_SESSKEY_SIZE bytes (zeroed) so the KDF + * input buffer is always valid for HMAC-SHA256 even with deprecated + * Kerberos enctypes that return a short session key. + */ + if (unlikely(msg->sesskey_len < SMB2_NTLMV2_SESSKEY_SIZE)) + cifs_dbg(VFS, + "short GSS session key (%u bytes); zero-padding per MS-SMB2 3.2.5.3\n", + msg->sesskey_len); + kfree_sensitive(ses->auth_key.response); - ses->auth_key.response = kmemdup(msg->data, - msg->sesskey_len, - GFP_KERNEL); + ses->auth_key.len = max_t(unsigned int, msg->sesskey_len, + SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n", - __func__, msg->sesskey_len); + __func__, ses->auth_key.len); + ses->auth_key.len = 0; rc = -ENOMEM; goto out_put_spnego_key; } - ses->auth_key.len = msg->sesskey_len; + memcpy(ses->auth_key.response, msg->data, msg->sesskey_len); sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; sess_data->iov[1].iov_len = msg->secblob_len;
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 4100903..e8eeff9 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c
@@ -251,7 +251,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } static void generate_key(struct cifs_ses *ses, struct kvec label, - struct kvec context, __u8 *key, unsigned int key_size) + struct kvec context, __u8 *key, unsigned int key_size, + unsigned int full_key_size) { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; @@ -265,7 +266,7 @@ static void generate_key(struct cifs_ses *ses, struct kvec label, memset(key, 0x0, key_size); hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response, - SMB2_NTLMV2_SESSKEY_SIZE); + full_key_size); hmac_sha256_update(&hmac_ctx, i, 4); hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len); hmac_sha256_update(&hmac_ctx, &zero, 1); @@ -298,6 +299,7 @@ generate_smb3signingkey(struct cifs_ses *ses, struct TCP_Server_Info *server, const struct derivation_triplet *ptriplet) { + unsigned int full_key_size = SMB2_NTLMV2_SESSKEY_SIZE; bool is_binding = false; int chan_index = 0; @@ -330,12 +332,24 @@ generate_smb3signingkey(struct cifs_ses *ses, if (is_binding) { generate_key(ses, ptriplet->signing.label, ptriplet->signing.context, - ses->chans[chan_index].signkey, - SMB3_SIGN_KEY_SIZE); + ses->chans[chan_index].signkey, SMB3_SIGN_KEY_SIZE, + SMB2_NTLMV2_SESSKEY_SIZE); } else { generate_key(ses, ptriplet->signing.label, - ptriplet->signing.context, - ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); + ptriplet->signing.context, ses->smb3signingkey, + SMB3_SIGN_KEY_SIZE, SMB2_NTLMV2_SESSKEY_SIZE); + + /* + * Per MS-SMB2 3.2.5.3.1, signing key always uses Session.SessionKey + * (first 16 bytes). Encryption/decryption keys use + * Session.FullSessionKey when dialect is 3.1.1 and cipher is + * AES-256-CCM or AES-256-GCM, otherwise Session.SessionKey. + */ + + if (server->dialect == SMB311_PROT_ID && + (server->cipher_type == SMB2_ENCRYPTION_AES256_CCM || + server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + full_key_size = ses->auth_key.len; /* safe to access primary channel, since it will never go away */ spin_lock(&ses->chan_lock); @@ -345,10 +359,13 @@ generate_smb3signingkey(struct cifs_ses *ses, generate_key(ses, ptriplet->encryption.label, ptriplet->encryption.context, - ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE); + ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE, + full_key_size); + generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, - ses->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE); + ses->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE, + full_key_size); } #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS @@ -361,7 +378,7 @@ generate_smb3signingkey(struct cifs_ses *ses, &ses->Suid); cifs_dbg(VFS, "Cipher type %d\n", server->cipher_type); cifs_dbg(VFS, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); + (int)ses->auth_key.len, ses->auth_key.response); cifs_dbg(VFS, "Signing Key %*ph\n", SMB3_SIGN_KEY_SIZE, ses->smb3signingkey); if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 75f9f91..563ef488 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c
@@ -9,7 +9,6 @@ #include "cifs_debug.h" #include "cifsproto.h" #include "smb2proto.h" -#include "../smbdirect/public.h" /* Port numbers for SMBD transport */ #define SMB_PORT 445 @@ -558,3 +557,5 @@ void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) server->rdma_readwrite_threshold, m); } + +MODULE_IMPORT_NS("SMBDIRECT");
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 287ac84..be205ec 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h
@@ -12,7 +12,7 @@ #include "cifsglob.h" -#include "../smbdirect/smbdirect.h" +#include <linux/smbdirect.h> extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth;
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index c5aac49..8347495 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c
@@ -79,6 +79,85 @@ static int create_proc_clients(void) { return 0; } static void delete_proc_clients(void) {} #endif +static struct workqueue_struct *ksmbd_conn_wq; + +int ksmbd_conn_wq_init(void) +{ + ksmbd_conn_wq = alloc_workqueue("ksmbd-conn-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + if (!ksmbd_conn_wq) + return -ENOMEM; + return 0; +} + +void ksmbd_conn_wq_destroy(void) +{ + if (ksmbd_conn_wq) { + destroy_workqueue(ksmbd_conn_wq); + ksmbd_conn_wq = NULL; + } +} + +/* + * __ksmbd_conn_release_work() - perform the final, once-per-struct cleanup + * of a ksmbd_conn whose refcount has just dropped to zero. + * + * This is the common release path used by ksmbd_conn_put() for the embedded + * state that outlives the connection thread: async_ida and the attached + * transport (which owns the socket and iov for TCP). Called from a workqueue + * so that sleep-allowed teardown (sock_release -> tcp_close -> + * lock_sock_nested) never runs from an RCU softirq callback (free_opinfo_rcu) + * or any other non-sleeping putter context. + */ +static void __ksmbd_conn_release_work(struct work_struct *work) +{ + struct ksmbd_conn *conn = + container_of(work, struct ksmbd_conn, release_work); + + ida_destroy(&conn->async_ida); + conn->transport->ops->free_transport(conn->transport); + kfree(conn); +} + +/** + * ksmbd_conn_get() - take a reference on @conn and return it. + * + * @conn: connection instance to get a reference to + * + * Returns @conn unchanged so callers can write + * "fp->conn = ksmbd_conn_get(work->conn);" in one expression. Returns NULL + * if @conn is NULL. + */ +struct ksmbd_conn *ksmbd_conn_get(struct ksmbd_conn *conn) +{ + if (!conn) + return NULL; + + atomic_inc(&conn->refcnt); + return conn; +} + +/** + * ksmbd_conn_put() - drop a reference and, if it was the last, queue the + * release onto ksmbd_conn_wq so it runs from process context. + * + * @conn: connection instance to put a reference to + * + * Callable from any context including RCU softirq callbacks and non-sleeping + * locks; the actual release is deferred to the workqueue. ksmbd_conn_wq is + * created in ksmbd_server_init() before any conn can be allocated and is + * destroyed in ksmbd_server_exit() after rcu_barrier(), so it is always + * non-NULL while a conn reference is held. + */ +void ksmbd_conn_put(struct ksmbd_conn *conn) +{ + if (!conn) + return; + + if (atomic_dec_and_test(&conn->refcnt)) + queue_work(ksmbd_conn_wq, &conn->release_work); +} + /** * ksmbd_conn_free() - free resources of the connection instance * @@ -93,23 +172,19 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) hash_del(&conn->hlist); up_write(&conn_list_lock); + /* + * request_buf / preauth_info / mechToken are only ever accessed by the + * connection handler thread that owns @conn. ksmbd_conn_free() is + * called from the transport free_transport() path when that thread is + * exiting, so it is safe to release them unconditionally even when + * ksmbd_conn_put() below is not the final putter (oplock / ksmbd_file + * holders only retain the conn pointer, not these per-thread buffers). + */ xa_destroy(&conn->sessions); kvfree(conn->request_buf); kfree(conn->preauth_info); kfree(conn->mechToken); - if (atomic_dec_and_test(&conn->refcnt)) { - /* - * async_ida is embedded in struct ksmbd_conn, so pair - * ida_destroy() with the final kfree() rather than with - * the unconditional field teardown above. This keeps - * the IDA valid for the entire lifetime of the struct, - * even while other refcount holders (oplock / vfs - * durable handles) still reference the connection. - */ - ida_destroy(&conn->async_ida); - conn->transport->ops->free_transport(conn->transport); - kfree(conn); - } + ksmbd_conn_put(conn); } /** @@ -136,6 +211,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) conn->um = ERR_PTR(-EOPNOTSUPP); if (IS_ERR(conn->um)) conn->um = NULL; + INIT_WORK(&conn->release_work, __ksmbd_conn_release_work); atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); atomic_set(&conn->refcnt, 1); @@ -512,8 +588,7 @@ void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn) if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) wake_up(&conn->r_count_q); - if (atomic_dec_and_test(&conn->refcnt)) - kfree(conn); + ksmbd_conn_put(conn); } int ksmbd_conn_transport_init(void)
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index de2d469..e074be9 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h
@@ -16,6 +16,7 @@ #include <linux/kthread.h> #include <linux/nls.h> #include <linux/unicode.h> +#include <linux/workqueue.h> #include "smb_common.h" #include "ksmbd_work.h" @@ -120,6 +121,7 @@ struct ksmbd_conn { bool binding; atomic_t refcnt; bool is_aapl; + struct work_struct release_work; }; struct ksmbd_conn_ops { @@ -164,6 +166,10 @@ void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); +struct ksmbd_conn *ksmbd_conn_get(struct ksmbd_conn *conn); +void ksmbd_conn_put(struct ksmbd_conn *conn); +int ksmbd_conn_wq_init(void); +void ksmbd_conn_wq_destroy(void); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); int ksmbd_conn_write(struct ksmbd_work *work); int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 53f44ff..6f97f8d 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c
@@ -167,7 +167,10 @@ static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, share->path = kstrndup(ksmbd_share_config_path(resp), path_len, KSMBD_DEFAULT_GFP); - if (share->path) { + if (!share->path) { + ret = -ENOMEM; + } else { + ret = 0; share->path_sz = strlen(share->path); while (share->path_sz > 1 && share->path[share->path_sz - 1] == '/') @@ -179,9 +182,10 @@ static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, share->force_directory_mode = resp->force_directory_mode; share->force_uid = resp->force_uid; share->force_gid = resp->force_gid; - ret = parse_veto_list(share, - KSMBD_SHARE_CONFIG_VETO_LIST(resp), - resp->veto_list_sz); + if (!ret) + ret = parse_veto_list(share, + KSMBD_SHARE_CONFIG_VETO_LIST(resp), + resp->veto_list_sz); if (!ret && share->path) { if (__ksmbd_override_fsids(work, share)) { kill_share(share);
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index cd3f28b..8feca02 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c
@@ -30,7 +30,6 @@ static DEFINE_RWLOCK(lease_list_lock); static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, u64 id, __u16 Tid) { - struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; struct oplock_info *opinfo; @@ -39,7 +38,7 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, return NULL; opinfo->sess = sess; - opinfo->conn = conn; + opinfo->conn = ksmbd_conn_get(work->conn); opinfo->level = SMB2_OPLOCK_LEVEL_NONE; opinfo->op_state = OPLOCK_STATE_NONE; opinfo->pending_break = 0; @@ -50,7 +49,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, init_waitqueue_head(&opinfo->oplock_brk); atomic_set(&opinfo->refcount, 1); atomic_set(&opinfo->breaking_cnt, 0); - atomic_inc(&opinfo->conn->refcnt); return opinfo; } @@ -132,8 +130,7 @@ static void __free_opinfo(struct oplock_info *opinfo) { if (opinfo->is_lease) free_lease(opinfo); - if (opinfo->conn && atomic_dec_and_test(&opinfo->conn->refcnt)) - kfree(opinfo->conn); + ksmbd_conn_put(opinfo->conn); kfree(opinfo); }
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 58ef02c..5d799b2 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c
@@ -596,8 +596,14 @@ static int __init ksmbd_server_init(void) if (ret) goto err_crypto_destroy; + ret = ksmbd_conn_wq_init(); + if (ret) + goto err_workqueue_destroy; + return 0; +err_workqueue_destroy: + ksmbd_workqueue_destroy(); err_crypto_destroy: ksmbd_crypto_destroy(); err_release_inode_hash: @@ -623,6 +629,12 @@ static void __exit ksmbd_server_exit(void) { ksmbd_server_shutdown(); rcu_barrier(); + /* + * ksmbd_conn_put() defers the final release onto ksmbd_conn_wq, + * so drain it after rcu_barrier() has fired any pending RCU + * callbacks that may have queued a release. + */ + ksmbd_conn_wq_destroy(); ksmbd_release_inode_hash(); }
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 47b7af6..62d4399 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c
@@ -3767,8 +3767,10 @@ int smb2_open(struct ksmbd_work *work) err_out2: if (!rc) { - ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); - rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); + rc = ksmbd_update_fstate(&work->sess->file_table, fp, + FP_INITED); + if (!rc) + rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); } if (rc) { if (rc == -EINVAL)
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 4bbc2c2..c1d1f34 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c
@@ -1068,7 +1068,26 @@ static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type, ace->flags = flags; ace->access_req = access_req; smb_copy_sid(&ace->sid, sid); - ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + (sid->num_subauth * 4)); + ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + + (ace->sid.num_subauth * 4)); +} + +static int smb_append_inherited_ace(struct smb_ace **ace, int *nt_size, + u16 *ace_cnt, const struct smb_sid *sid, + u8 type, u8 flags, __le32 access_req) +{ + int ace_size; + + smb_set_ace(*ace, sid, type, flags, access_req); + ace_size = le16_to_cpu((*ace)->size); + /* pdacl->size is __le16 and includes struct smb_acl. */ + if (check_add_overflow(*nt_size, ace_size, nt_size) || + *nt_size > U16_MAX - (int)sizeof(struct smb_acl)) + return -EINVAL; + + (*ace_cnt)++; + *ace = (struct smb_ace *)((char *)*ace + ace_size); + return 0; } int smb_inherit_dacl(struct ksmbd_conn *conn, @@ -1157,6 +1176,12 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, CIFS_SID_BASE_SIZE) break; + if (parent_aces->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || + pace_size < offsetof(struct smb_ace, sid) + + CIFS_SID_BASE_SIZE + + sizeof(__le32) * parent_aces->sid.num_subauth) + break; + aces_size -= pace_size; flags = parent_aces->flags; @@ -1186,22 +1211,24 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, } if (is_dir && creator && flags & CONTAINER_INHERIT_ACE) { - smb_set_ace(aces, psid, parent_aces->type, inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - ace_cnt++; - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, + psid, parent_aces->type, + inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; flags |= INHERIT_ONLY_ACE; psid = creator; } else if (is_dir && !(parent_aces->flags & NO_PROPAGATE_INHERIT_ACE)) { psid = &parent_aces->sid; } - smb_set_ace(aces, psid, parent_aces->type, flags | inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); - ace_cnt++; + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, psid, + parent_aces->type, + flags | inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; pass: parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); } @@ -1211,7 +1238,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; - int pntsd_alloc_size; + size_t pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1224,8 +1251,19 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + if (check_add_overflow(sizeof(struct smb_ntsd), + (size_t)powner_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, + (size_t)pgroup_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, sizeof(struct smb_acl), + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, (size_t)nt_size, + &pntsd_alloc_size)) { + rc = -EINVAL; + goto free_aces_base; + } pntsd = kzalloc(pntsd_alloc_size, KSMBD_DEFAULT_GFP); if (!pntsd) {
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a8242c0..b6d63ff 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c
@@ -18,7 +18,6 @@ #include "smb_common.h" #include "../common/smb2status.h" #include "transport_rdma.h" -#include "../smbdirect/public.h" #define SMB_DIRECT_PORT_IWARP 5445 @@ -540,3 +539,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .rdma_write = smb_direct_rdma_write, .free_transport = smb_direct_free_transport, }; + +MODULE_IMPORT_NS("SMBDIRECT");
diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index bde3d88..8b78917 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h
@@ -25,6 +25,6 @@ static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } #endif -#include "../smbdirect/smbdirect.h" +#include <linux/smbdirect.h> #endif /* __KSMBD_TRANSPORT_RDMA_H__ */
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 3551f01..354c4d8 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c
@@ -418,6 +418,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) return; idr_remove(global_ft.idr, fp->persistent_id); + /* + * Clear persistent_id so a later __ksmbd_close_fd() that runs from a + * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() + * walker held the final reference) does not re-issue idr_remove() on + * an id that idr_alloc_cyclic() may have already handed out to a new + * durable handle. + */ + fp->persistent_id = KSMBD_NO_FID; } static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) @@ -431,13 +439,13 @@ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) { - if (!has_file_id(fp->volatile_id)) - return; - down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); up_write(&fp->f_ci->m_lock); + if (!has_file_id(fp->volatile_id)) + return; + write_lock(&ft->lock); idr_remove(ft->idr, fp->volatile_id); write_unlock(&ft->lock); @@ -475,6 +483,17 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) kfree(smb_lock); } + /* + * Drop fp's strong reference on conn (taken in ksmbd_open_fd() / + * ksmbd_reopen_durable_fd()). Durable fps that reached the + * scavenger have already had fp->conn cleared by session_fd_check(), + * in which case there is nothing to drop here. + */ + if (fp->conn) { + ksmbd_conn_put(fp->conn); + fp->conn = NULL; + } + if (ksmbd_stream_fd(fp)) kfree(fp->stream.name); kfree(fp->owner.name); @@ -510,6 +529,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) { + /* + * Detached durable fp -- session_fd_check() cleared fp->conn at + * preserve, so this fp is no longer tracked by any conn's + * stats.open_files_count. This happens when + * ksmbd_scavenger_dispose_dh() hands the final close off to an + * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn + * is unrelated to the conn that originally opened the handle; close + * via the NULL-ft path so we do not underflow that unrelated + * counter. + */ + if (!fp->conn) { + __ksmbd_close_fd(NULL, fp); + return; + } __ksmbd_close_fd(&work->sess->file_table, fp); atomic_dec(&work->conn->stats.open_files_count); } @@ -752,7 +785,14 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) atomic_set(&fp->refcount, 1); fp->filp = filp; - fp->conn = work->conn; + /* + * fp owns a strong reference on fp->conn for as long as fp->conn is + * non-NULL, so session_fd_check() and __ksmbd_close_fd() never + * dereference a dangling pointer. Paired with ksmbd_conn_put() in + * session_fd_check() (durable preserve), in __ksmbd_close_fd() + * (final close), and on the error paths below. + */ + fp->conn = ksmbd_conn_get(work->conn); fp->tcon = work->tcon; fp->volatile_id = KSMBD_NO_FID; fp->persistent_id = KSMBD_NO_FID; @@ -774,19 +814,64 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) return fp; err_out: + /* fp->conn was set and refcounted before every branch here. */ + ksmbd_conn_put(fp->conn); kmem_cache_free(filp_cache, fp); return ERR_PTR(ret); } -void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, - unsigned int state) +/** + * ksmbd_update_fstate() - update an fp state under the file-table lock + * @ft: file table that publishes @fp's volatile id + * @fp: file pointer to update + * @state: new state + * + * Return: 0 on success. The FP_NEW -> FP_INITED transition is special: + * -ENOENT if teardown already unpublished @fp by advancing the state or + * clearing the volatile id. Other state updates preserve the historical + * fire-and-forget behavior. + */ +int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state) { + int ret; + if (!fp) - return; + return -ENOENT; write_lock(&ft->lock); - fp->f_state = state; + if (state == FP_INITED && + (fp->f_state != FP_NEW || !has_file_id(fp->volatile_id))) { + ret = -ENOENT; + } else { + fp->f_state = state; + ret = 0; + } write_unlock(&ft->lock); + + return ret; +} + +/* + * ksmbd_mark_fp_closed() - mark fp closed under ft->lock and return how many + * refs the teardown path owns. + * + * FP_INITED has a normal idr-owned reference, so teardown owns both that + * reference and the transient lookup reference. FP_NEW is still owned by the + * in-flight opener/reopener, which will drop the original reference after + * ksmbd_update_fstate(..., FP_INITED) observes the cleared volatile id. + * FP_CLOSED on entry means an earlier ksmbd_close_fd() already consumed the + * idr-owned ref. + */ +static int ksmbd_mark_fp_closed(struct ksmbd_file *fp) +{ + if (fp->f_state == FP_INITED) { + set_close_state_blocked_works(fp); + fp->f_state = FP_CLOSED; + return 2; + } + + return 1; } static int @@ -794,7 +879,8 @@ __close_file_table_ids(struct ksmbd_session *sess, struct ksmbd_tree_connect *tcon, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp, - struct ksmbd_user *user)) + struct ksmbd_user *user), + bool skip_preserves_fp) { struct ksmbd_file_table *ft = &sess->file_table; struct ksmbd_file *fp; @@ -802,32 +888,120 @@ __close_file_table_ids(struct ksmbd_session *sess, int num = 0; while (1) { + int n_to_drop; + write_lock(&ft->lock); fp = idr_get_next(ft->idr, &id); if (!fp) { write_unlock(&ft->lock); break; } - - if (skip(tcon, fp, sess->user) || - !atomic_dec_and_test(&fp->refcount)) { + if (!atomic_inc_not_zero(&fp->refcount)) { id++; write_unlock(&ft->lock); continue; } - set_close_state_blocked_works(fp); - idr_remove(ft->idr, fp->volatile_id); - fp->volatile_id = KSMBD_NO_FID; - write_unlock(&ft->lock); + if (skip_preserves_fp) { + /* + * Session teardown: skip() is session_fd_check(), + * which may sleep and mutates fp->conn / fp->tcon / + * fp->volatile_id when it chooses to preserve fp + * for durable reconnect. Unpublish fp from the + * session idr here, under ft->lock, so that + * __ksmbd_lookup_fd() through this session cannot + * grant a new ksmbd_fp_get() reference to an fp + * whose fields are about to be rewritten outside + * the lock. Durable reconnect still reaches fp via + * global_ft. + */ + idr_remove(ft->idr, id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + if (skip(tcon, fp, sess->user)) { + /* + * session_fd_check() has converted fp to + * durable-preserve state and cleared its + * per-conn fields. fp is already unpublished + * above; the original idr-owned ref keeps it + * alive for the durable scavenger. Drop only + * the transient ref. atomic_dec() is safe -- + * atomic_inc_not_zero() succeeded on a + * positive value and we added one more, so + * refcount cannot be zero here. + */ + atomic_dec(&fp->refcount); + id++; + continue; + } + + /* + * Keep the close-state decision under the same lock + * observed by ksmbd_update_fstate(), which is how an + * in-flight FP_NEW opener learns that teardown has + * cleared its volatile id. + */ + write_lock(&ft->lock); + n_to_drop = ksmbd_mark_fp_closed(fp); + write_unlock(&ft->lock); + } else { + /* + * Tree teardown: skip() is tree_conn_fd_check(), a + * cheap pointer compare that doesn't sleep and has + * no side effects, so keep the skip decision plus + * the unpublish-and-mark-closed sequence atomic + * under ft->lock. fps belonging to other tree + * connects (skip() == true) stay fully published in + * the session idr with no lock window. + */ + if (skip(tcon, fp, sess->user)) { + atomic_dec(&fp->refcount); + write_unlock(&ft->lock); + id++; + continue; + } + idr_remove(ft->idr, id); + fp->volatile_id = KSMBD_NO_FID; + n_to_drop = ksmbd_mark_fp_closed(fp); + write_unlock(&ft->lock); + } + + /* + * fp->volatile_id is already cleared to prevent stale idr + * removal from a deferred final close. Remove fp from + * m_fp_list here because __ksmbd_remove_fd() will skip the + * list unlink when volatile_id is KSMBD_NO_FID. + */ down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); up_write(&fp->f_ci->m_lock); - __ksmbd_close_fd(ft, fp); - - num++; + /* + * Drop the references this iteration owns: + * + * n_to_drop == 2: we observed FP_INITED and committed + * the FP_CLOSED transition ourselves, so we own the + * transient (+1) and the still-intact idr-owned ref. + * + * n_to_drop == 1: either a prior ksmbd_close_fd() + * already consumed the idr-owned ref, or fp was still + * FP_NEW and the in-flight opener/reopener must keep + * the original reference until ksmbd_update_fstate() + * observes the cleared volatile id. + * + * If we end up as the final putter, finalize fp and + * account the open_files_count decrement via the caller's + * atomic_sub(num, ...). Otherwise the remaining user's + * ksmbd_fd_put() reaches __put_fd_final(), which does its + * own atomic_dec(&open_files_count), so we must not count + * this fp here -- doing so would double-decrement the + * connection-wide counter. + */ + if (atomic_sub_and_test(n_to_drop, &fp->refcount)) { + __ksmbd_close_fd(NULL, fp); + num++; + } id++; } @@ -881,24 +1055,37 @@ static bool ksmbd_durable_scavenger_alive(void) return true; } -static void ksmbd_scavenger_dispose_dh(struct list_head *head) +static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) { - while (!list_empty(head)) { - struct ksmbd_file *fp; + /* + * Durable-preserved fp can remain linked on f_ci->m_fp_list for + * share-mode checks. Unlink it before final close; fp->node is not + * available as a scavenger-private list node because re-adding it to + * another list corrupts m_fp_list. + */ + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - fp = list_first_entry(head, struct ksmbd_file, node); - list_del_init(&fp->node); + /* + * Drop both the durable lifetime reference and the transient reference + * taken by the scavenger under global_ft.lock. If a concurrent + * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp + * before the unlink above, that holder owns the final close via + * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is + * the last putter and finalises fp here. + */ + if (atomic_sub_and_test(2, &fp->refcount)) __ksmbd_close_fd(NULL, fp); - } } static int ksmbd_durable_scavenger(void *dummy) { struct ksmbd_file *fp = NULL; + struct ksmbd_file *expired_fp; unsigned int id; unsigned int min_timeout = 1; bool found_fp_timeout; - LIST_HEAD(scavenger_list); unsigned long remaining_jiffies; __module_get(THIS_MODULE); @@ -908,8 +1095,6 @@ static int ksmbd_durable_scavenger(void *dummy) if (try_to_freeze()) continue; - found_fp_timeout = false; - remaining_jiffies = wait_event_timeout(dh_wq, ksmbd_durable_scavenger_alive() == false, __msecs_to_jiffies(min_timeout)); @@ -918,23 +1103,39 @@ static int ksmbd_durable_scavenger(void *dummy) else min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; - write_lock(&global_ft.lock); - idr_for_each_entry(global_ft.idr, fp, id) { - if (!fp->durable_timeout) - continue; + do { + expired_fp = NULL; + found_fp_timeout = false; - if (atomic_read(&fp->refcount) > 1 || - fp->conn) - continue; - - found_fp_timeout = true; - if (fp->durable_scavenger_timeout <= - jiffies_to_msecs(jiffies)) { - __ksmbd_remove_durable_fd(fp); - list_add(&fp->node, &scavenger_list); - } else { + write_lock(&global_ft.lock); + idr_for_each_entry(global_ft.idr, fp, id) { unsigned long durable_timeout; + if (!fp->durable_timeout) + continue; + + if (atomic_read(&fp->refcount) > 1 || + fp->conn) + continue; + + found_fp_timeout = true; + if (fp->durable_scavenger_timeout <= + jiffies_to_msecs(jiffies)) { + __ksmbd_remove_durable_fd(fp); + /* + * Take a transient reference so fp + * cannot be freed by an in-flight + * ksmbd_lookup_fd_inode() that found + * it through f_ci->m_fp_list while we + * drop global_ft.lock and reach the + * m_fp_list unlink in + * ksmbd_scavenger_dispose_dh(). + */ + atomic_inc(&fp->refcount); + expired_fp = fp; + break; + } + durable_timeout = fp->durable_scavenger_timeout - jiffies_to_msecs(jiffies); @@ -942,10 +1143,11 @@ static int ksmbd_durable_scavenger(void *dummy) if (min_timeout > durable_timeout) min_timeout = durable_timeout; } - } - write_unlock(&global_ft.lock); + write_unlock(&global_ft.lock); - ksmbd_scavenger_dispose_dh(&scavenger_list); + if (expired_fp) + ksmbd_scavenger_dispose_dh(expired_fp); + } while (expired_fp); if (found_fp_timeout == false) break; @@ -1062,25 +1264,35 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, if (!is_reconnectable(fp)) return false; + if (fp->f_state != FP_INITED) + return false; + + if (WARN_ON_ONCE(!fp->conn)) + return false; + if (ksmbd_vfs_copy_durable_owner(fp, user)) return false; + /* + * fp owns a strong reference on fp->conn (taken in ksmbd_open_fd() + * / ksmbd_reopen_durable_fd()), so conn stays valid for the whole + * body of this function regardless of any op->conn puts below. + */ conn = fp->conn; ci = fp->f_ci; down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn != conn) continue; - if (op->conn && atomic_dec_and_test(&op->conn->refcnt)) - kfree(op->conn); + ksmbd_conn_put(op->conn); op->conn = NULL; } up_write(&ci->m_lock); list_for_each_entry_safe(smb_lock, tmp_lock, &fp->lock_list, flist) { - spin_lock(&fp->conn->llist_lock); + spin_lock(&conn->llist_lock); list_del_init(&smb_lock->clist); - spin_unlock(&fp->conn->llist_lock); + spin_unlock(&conn->llist_lock); } fp->conn = NULL; @@ -1091,6 +1303,8 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, fp->durable_scavenger_timeout = jiffies_to_msecs(jiffies) + fp->durable_timeout; + /* Drop fp's own reference on conn. */ + ksmbd_conn_put(conn); return true; } @@ -1098,7 +1312,8 @@ void ksmbd_close_tree_conn_fds(struct ksmbd_work *work) { int num = __close_file_table_ids(work->sess, work->tcon, - tree_conn_fd_check); + tree_conn_fd_check, + false); atomic_sub(num, &work->conn->stats.open_files_count); } @@ -1107,7 +1322,8 @@ void ksmbd_close_session_fds(struct ksmbd_work *work) { int num = __close_file_table_ids(work->sess, work->tcon, - session_fd_check); + session_fd_check, + true); atomic_sub(num, &work->conn->stats.open_files_count); } @@ -1178,15 +1394,27 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) old_f_state = fp->f_state; fp->f_state = FP_NEW; + + /* + * Initialize fp's connection binding before publishing fp into the + * session's file table. If __open_id() is ordered first, a + * concurrent teardown that iterates the table can observe a valid + * volatile_id with fp->conn == NULL and preserve a + * partially-initialized fp. fp owns a strong reference on the new + * conn (see ksmbd_open_fd()); undo it on __open_id() failure. + */ + fp->conn = ksmbd_conn_get(conn); + fp->tcon = work->tcon; + __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { + fp->conn = NULL; + fp->tcon = NULL; + ksmbd_conn_put(conn); fp->f_state = old_f_state; return -EBADF; } - fp->conn = conn; - fp->tcon = work->tcon; - list_for_each_entry(smb_lock, &fp->lock_list, flist) { spin_lock(&conn->llist_lock); list_add_tail(&smb_lock->clist, &conn->lock_list); @@ -1198,8 +1426,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn) continue; - op->conn = fp->conn; - atomic_inc(&op->conn->refcnt); + op->conn = ksmbd_conn_get(fp->conn); } up_write(&ci->m_lock); @@ -1228,7 +1455,7 @@ void ksmbd_destroy_file_table(struct ksmbd_session *sess) if (!ft->idr) return; - __close_file_table_ids(sess, NULL, session_fd_check); + __close_file_table_ids(sess, NULL, session_fd_check, true); idr_destroy(ft->idr); kfree(ft->idr); ft->idr = NULL;
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 866f32c1..e687126 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h
@@ -172,8 +172,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); int ksmbd_init_global_file_table(void); void ksmbd_free_global_file_table(void); void ksmbd_set_fd_limit(unsigned long limit); -void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, - unsigned int state); +int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state); bool ksmbd_vfs_compare_durable_owner(struct ksmbd_file *fp, struct ksmbd_user *user);
diff --git a/fs/smb/smbdirect/accept.c b/fs/smb/smbdirect/accept.c index 704b271..5297400 100644 --- a/fs/smb/smbdirect/accept.c +++ b/fs/smb/smbdirect/accept.c
@@ -854,4 +854,4 @@ struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, return nsc; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_accept); +EXPORT_SYMBOL_GPL(smbdirect_socket_accept);
diff --git a/fs/smb/smbdirect/connect.c b/fs/smb/smbdirect/connect.c index 8addee4..cd726b3 100644 --- a/fs/smb/smbdirect/connect.c +++ b/fs/smb/smbdirect/connect.c
@@ -60,7 +60,7 @@ int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst) */ return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect); +EXPORT_SYMBOL_GPL(smbdirect_connect); static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc) { @@ -922,4 +922,4 @@ int smbdirect_connect_sync(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect_sync); +EXPORT_SYMBOL_GPL(smbdirect_connect_sync);
diff --git a/fs/smb/smbdirect/connection.c b/fs/smb/smbdirect/connection.c index 8223667..fe9912e 100644 --- a/fs/smb/smbdirect/connection.c +++ b/fs/smb/smbdirect/connection.c
@@ -706,7 +706,7 @@ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) return false; return true; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); +EXPORT_SYMBOL_GPL(smbdirect_connection_is_connected); int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) { @@ -779,7 +779,7 @@ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); +EXPORT_SYMBOL_GPL(smbdirect_connection_wait_for_connected); void smbdirect_connection_idle_timer_work(struct work_struct *work) { @@ -958,7 +958,7 @@ int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_batch_flush); struct smbdirect_send_batch * smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, @@ -976,7 +976,7 @@ smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, return batch; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_init_send_batch_storage); +EXPORT_SYMBOL_GPL(smbdirect_init_send_batch_storage); static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch) @@ -1263,7 +1263,7 @@ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, bcredit_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_single_iter); int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) { @@ -1288,7 +1288,7 @@ int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_wait_zero_pending); int smbdirect_connection_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, @@ -1373,7 +1373,7 @@ int smbdirect_connection_send_iter(struct smbdirect_socket *sc, return total_count; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_iter); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_iter); static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { @@ -1937,7 +1937,7 @@ int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, goto again; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_recvmsg); +EXPORT_SYMBOL_GPL(smbdirect_connection_recvmsg); static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len)
diff --git a/fs/smb/smbdirect/debug.c b/fs/smb/smbdirect/debug.c index a66a19d..05ba7c8 100644 --- a/fs/smb/smbdirect/debug.c +++ b/fs/smb/smbdirect/debug.c
@@ -85,4 +85,4 @@ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, atomic_read(&sc->mr_io.ready.count), atomic_read(&sc->mr_io.used.count)); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_legacy_debug_proc_show); +EXPORT_SYMBOL_GPL(smbdirect_connection_legacy_debug_proc_show);
diff --git a/fs/smb/smbdirect/devices.c b/fs/smb/smbdirect/devices.c index 44962f2..7adacbd 100644 --- a/fs/smb/smbdirect/devices.c +++ b/fs/smb/smbdirect/devices.c
@@ -238,7 +238,7 @@ u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev) return RDMA_NODE_UNSPECIFIED; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_netdev_rdma_capable_node_type); +EXPORT_SYMBOL_GPL(smbdirect_netdev_rdma_capable_node_type); __init int smbdirect_devices_init(void) {
diff --git a/fs/smb/smbdirect/internal.h b/fs/smb/smbdirect/internal.h index 2d5acf2..e9959e6 100644 --- a/fs/smb/smbdirect/internal.h +++ b/fs/smb/smbdirect/internal.h
@@ -6,11 +6,11 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ #define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ +#define DEFAULT_SYMBOL_NAMESPACE "SMBDIRECT" #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include "smbdirect.h" +#include <linux/smbdirect.h> #include "pdu.h" -#include "public.h" #include <linux/mutex.h>
diff --git a/fs/smb/smbdirect/listen.c b/fs/smb/smbdirect/listen.c index 143a761..2f78bca 100644 --- a/fs/smb/smbdirect/listen.c +++ b/fs/smb/smbdirect/listen.c
@@ -90,7 +90,7 @@ int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog) */ return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_listen); +EXPORT_SYMBOL_GPL(smbdirect_socket_listen); static int smbdirect_new_rdma_event_handler(struct rdma_cm_id *new_id, struct rdma_cm_event *event)
diff --git a/fs/smb/smbdirect/mr.c b/fs/smb/smbdirect/mr.c index 5228e69..15c6363 100644 --- a/fs/smb/smbdirect/mr.c +++ b/fs/smb/smbdirect/mr.c
@@ -269,7 +269,7 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_mr_io *mr; - int ret, num_pages; + int ret, num_pages, num_mapped; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -300,19 +300,22 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, num_pages, iov_iter_count(iter), sp->max_frmr_depth); smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); - ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - if (!ret) { + num_mapped = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + if (!num_mapped) { smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n", - num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + "ib_dma_map_sg num_pages=%u dir=%x num_mapped=%d\n", + num_pages, mr->dir, num_mapped); + ret = -EIO; goto dma_map_error; } - ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); - if (ret != mr->sgt.nents) { + ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, num_mapped, NULL, PAGE_SIZE); + if (ret != num_mapped) { smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_map_mr_sg failed ret = %d nents = %u\n", - ret, mr->sgt.nents); + "ib_map_mr_sg failed ret = %d num_mapped = %u\n", + ret, num_mapped); + if (ret >= 0) + ret = -EIO; goto map_mr_error; } @@ -380,7 +383,7 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, mutex_unlock(&mr->mutex); return NULL; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); +EXPORT_SYMBOL_GPL(smbdirect_connection_register_mr_io); void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, struct smbdirect_buffer_descriptor_v1 *v1) @@ -397,7 +400,7 @@ void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, } mutex_unlock(&mr->mutex); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); +EXPORT_SYMBOL_GPL(smbdirect_mr_io_fill_buffer_descriptor); /* * Deregister a MR after I/O is done @@ -490,4 +493,4 @@ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) mutex_unlock(&mr->mutex); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io); +EXPORT_SYMBOL_GPL(smbdirect_connection_deregister_mr_io);
diff --git a/fs/smb/smbdirect/rw.c b/fs/smb/smbdirect/rw.c index c2f46b1..6fe3804 100644 --- a/fs/smb/smbdirect/rw.c +++ b/fs/smb/smbdirect/rw.c
@@ -252,4 +252,4 @@ int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, kfree(msg); goto out; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_rdma_xmit); +EXPORT_SYMBOL_GPL(smbdirect_connection_rdma_xmit);
diff --git a/fs/smb/smbdirect/smbdirect.h b/fs/smb/smbdirect/smbdirect.h deleted file mode 100644 index bbab5f7..0000000 --- a/fs/smb/smbdirect/smbdirect.h +++ /dev/null
@@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2025 Stefan Metzmacher - */ - -#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ -#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ - -#include <linux/types.h> - -/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ -struct smbdirect_buffer_descriptor_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - -/* - * Connection parameters mostly from [MS-SMBD] 3.1.1.1 - * - * These are setup and negotiated at the beginning of a - * connection and remain constant unless explicitly changed. - * - * Some values are important for the upper layer. - */ -struct smbdirect_socket_parameters { - __u64 flags; -#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) -#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) - __u32 resolve_addr_timeout_msec; - __u32 resolve_route_timeout_msec; - __u32 rdma_connect_timeout_msec; - __u32 negotiate_timeout_msec; - __u16 initiator_depth; /* limited to U8_MAX */ - __u16 responder_resources; /* limited to U8_MAX */ - __u16 recv_credit_max; - __u16 send_credit_target; - __u32 max_send_size; - __u32 max_fragmented_send_size; - __u32 max_recv_size; - __u32 max_fragmented_recv_size; - __u32 max_read_write_size; - __u32 max_frmr_depth; - __u32 keepalive_interval_msec; - __u32 keepalive_timeout_msec; -} __packed; - -#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ - SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ - SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) - -#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */
diff --git a/fs/smb/smbdirect/socket.c b/fs/smb/smbdirect/socket.c index 1b4ab01..39cca72 100644 --- a/fs/smb/smbdirect/socket.c +++ b/fs/smb/smbdirect/socket.c
@@ -20,7 +20,7 @@ bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) return false; return true; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_frwr_is_supported); +EXPORT_SYMBOL_GPL(smbdirect_frwr_is_supported); static void smbdirect_socket_cleanup_work(struct work_struct *work); @@ -107,7 +107,7 @@ int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) alloc_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); +EXPORT_SYMBOL_GPL(smbdirect_socket_create_kern); int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { @@ -148,7 +148,7 @@ int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_so alloc_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_accepting); +EXPORT_SYMBOL_GPL(smbdirect_socket_create_accepting); int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp) @@ -189,14 +189,14 @@ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_initial_parameters); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_initial_parameters); const struct smbdirect_socket_parameters * smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) { return &sc->parameters; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_get_current_parameters); +EXPORT_SYMBOL_GPL(smbdirect_socket_get_current_parameters); int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, enum ib_poll_context poll_ctx, @@ -220,7 +220,7 @@ int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_kernel_settings); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_kernel_settings); void smbdirect_socket_set_logging(struct smbdirect_socket *sc, void *private_ptr, @@ -240,7 +240,7 @@ void smbdirect_socket_set_logging(struct smbdirect_socket *sc, sc->logging.needed = needed; sc->logging.vaprintf = vaprintf; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_logging); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_logging); static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) { @@ -663,13 +663,13 @@ int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_bind); +EXPORT_SYMBOL_GPL(smbdirect_socket_bind); void smbdirect_socket_shutdown(struct smbdirect_socket *sc) { smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_shutdown); +EXPORT_SYMBOL_GPL(smbdirect_socket_shutdown); static void smbdirect_socket_release_disconnect(struct kref *kref) { @@ -712,7 +712,7 @@ void smbdirect_socket_release(struct smbdirect_socket *sc) */ kref_put(&sc->refs.destroy, smbdirect_socket_release_destroy); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); +EXPORT_SYMBOL_GPL(smbdirect_socket_release); int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4b703c..01e2039 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h
@@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto; extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f425637..50a784d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h
@@ -611,8 +611,8 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; - /* used by cgroup_rmdir() to wait for dying tasks to leave */ - wait_queue_head_t dying_populated_waitq; + /* defers killing csses after removal until cgroup is depopulated */ + struct work_struct finish_destroy_work; /* used to schedule release agent */ struct work_struct release_agent_work;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e52160e..f6d037a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h
@@ -53,6 +53,7 @@ struct kernel_clone_args; enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_WITH_DEAD = (1U << 2), /* include exiting tasks */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ };
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 77c778d..a81b46a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h
@@ -146,6 +146,9 @@ struct xt_match { /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); + /* Called to validate hooks based on the match configuration. */ + int (*check_hooks)(const struct xt_mtchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -187,6 +190,9 @@ struct xt_target { /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); + /* Called to validate hooks based on the target configuration. */ + int (*check_hooks)(const struct xt_tgchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -279,8 +285,10 @@ bool xt_find_jump_offset(const unsigned int *offsets, int xt_check_proc_name(const char *name, unsigned int size); +int xt_check_hooks_match(struct xt_mtchk_param *par); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); +int xt_check_hooks_target(struct xt_tgchk_param *par); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto);
diff --git a/include/linux/rseq.h b/include/linux/rseq.h index b9d62fc..7ef79b2 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h
@@ -9,6 +9,11 @@ void __rseq_handle_slowpath(struct pt_regs *regs); +static __always_inline bool rseq_v2(struct task_struct *t) +{ + return IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && likely(t->rseq.event.has_rseq > 1); +} + /* Invoked from resume_user_mode_work() */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { @@ -16,8 +21,7 @@ static inline void rseq_handle_slowpath(struct pt_regs *regs) if (current->rseq.event.slowpath) __rseq_handle_slowpath(regs); } else { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) + if (current->rseq.event.sched_switch && current->rseq.event.has_rseq) __rseq_handle_slowpath(regs); } } @@ -30,9 +34,9 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs); */ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.has_rseq & current->rseq.event.user_irq) + if (rseq_v2(current)) { + /* has_rseq is implied in rseq_v2() */ + if (current->rseq.event.user_irq) __rseq_signal_deliver(ksig->sig, regs); } else { if (current->rseq.event.has_rseq) @@ -50,15 +54,22 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) { struct rseq_event *ev = &t->rseq.event; - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* + * Only apply the user_irq optimization for RSEQ ABI V2 registrations. + * Legacy users like TCMalloc rely on the original ABI V1 behaviour + * which updates IDs on every context swtich. + */ + if (rseq_v2(t)) { /* - * Avoid a boat load of conditionals by using simple logic - * to determine whether NOTIFY_RESUME needs to be raised. + * Avoid a boat load of conditionals by using simple logic to + * determine whether TIF_NOTIFY_RESUME or TIF_RSEQ needs to be + * raised. * - * It's required when the CPU or MM CID has changed or - * the entry was from user space. + * It's required when the CPU or MM CID has changed or the entry + * was via interrupt from user space. ev->has_rseq does not have + * to be evaluated here because rseq_v2() implies has_rseq. */ - bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; + bool raise = ev->user_irq | ev->ids_changed; if (raise) { ev->sched_switch = true; @@ -66,6 +77,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) } } else { if (ev->has_rseq) { + t->rseq.event.ids_changed = true; t->rseq.event.sched_switch = true; rseq_raise_notify_resume(t); } @@ -119,6 +131,8 @@ static inline void rseq_virt_userspace_exit(void) static inline void rseq_reset(struct task_struct *t) { + /* Protect against preemption and membarrier IPI */ + guard(irqsave)(); memset(&t->rseq, 0, sizeof(t->rseq)); t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; } @@ -159,6 +173,7 @@ static inline unsigned int rseq_alloc_align(void) } #else /* CONFIG_RSEQ */ +static inline bool rseq_v2(struct task_struct *t) { return false; } static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { }
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index f11ebd3..2d0295d 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h
@@ -111,6 +111,20 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } +/* + * Open coded, so it can be invoked within a user access region. + * + * This clears the user space state of the time slice extensions field only when + * the task has registered the optimized RSEQ_ABI V2. Some legacy registrations, + * e.g. TCMalloc, have conflicting non-ABI fields in struct RSEQ, which would be + * overwritten by an unconditional write. + */ +#define rseq_slice_clear_user(rseq, efault) \ +do { \ + if (rseq_slice_extension_enabled()) \ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); \ +} while (0) + static __always_inline bool __rseq_grant_slice_extension(bool work_pending) { struct task_struct *curr = current; @@ -230,10 +244,10 @@ static __always_inline bool rseq_slice_extension_enabled(void) { return false; } static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } +#define rseq_slice_clear_user(rseq, efault) do { } while (0) #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); -bool rseq_debug_validate_ids(struct task_struct *t); static __always_inline void rseq_note_user_irq_entry(void) { @@ -353,43 +367,6 @@ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, return false; } -/* - * On debug kernels validate that user space did not mess with it if the - * debug branch is enabled. - */ -bool rseq_debug_validate_ids(struct task_struct *t) -{ - struct rseq __user *rseq = t->rseq.usrptr; - u32 cpu_id, uval, node_id; - - /* - * On the first exit after registering the rseq region CPU ID is - * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0! - */ - node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ? - cpu_to_node(t->rseq.ids.cpu_id) : 0; - - scoped_user_read_access(rseq, efault) { - unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); - if (cpu_id != t->rseq.ids.cpu_id) - goto die; - unsafe_get_user(uval, &rseq->cpu_id, efault); - if (uval != cpu_id) - goto die; - unsafe_get_user(uval, &rseq->node_id, efault); - if (uval != node_id) - goto die; - unsafe_get_user(uval, &rseq->mm_cid, efault); - if (uval != t->rseq.ids.mm_cid) - goto die; - } - return true; -die: - t->rseq.event.fatal = true; -efault: - return false; -} - #endif /* RSEQ_BUILD_SLOW_PATH */ /* @@ -499,37 +476,50 @@ rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long c * faults in task context are fatal too. */ static rseq_inline -bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, - u32 node_id, u64 *csaddr) +bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, u64 *csaddr) { struct rseq __user *rseq = t->rseq.usrptr; - if (static_branch_unlikely(&rseq_debug_enabled)) { - if (!rseq_debug_validate_ids(t)) - return false; - } - scoped_user_rw_access(rseq, efault) { + /* Validate the R/O fields for debug and optimized mode */ + if (static_branch_unlikely(&rseq_debug_enabled) || rseq_v2(t)) { + u32 cpu_id, uval; + + unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); + if (cpu_id != t->rseq.ids.cpu_id) + goto die; + unsafe_get_user(uval, &rseq->cpu_id, efault); + if (uval != cpu_id) + goto die; + unsafe_get_user(uval, &rseq->node_id, efault); + if (uval != t->rseq.ids.node_id) + goto die; + unsafe_get_user(uval, &rseq->mm_cid, efault); + if (uval != t->rseq.ids.mm_cid) + goto die; + } + unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault); unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault); - unsafe_put_user(node_id, &rseq->node_id, efault); + unsafe_put_user(ids->node_id, &rseq->node_id, efault); unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); /* Cache the new values */ - t->rseq.ids.cpu_cid = ids->cpu_cid; + t->rseq.ids = *ids; rseq_stat_inc(rseq_stats.ids); rseq_trace_update(t, ids); return true; + +die: + t->rseq.event.fatal = true; efault: return false; } @@ -539,11 +529,11 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, * is in a critical section. */ static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs, - struct rseq_ids *ids, u32 node_id) + struct rseq_ids *ids) { u64 csaddr; - if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr)) + if (!rseq_set_ids_get_csaddr(t, ids, &csaddr)) return false; /* @@ -612,6 +602,14 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t * interrupts disabled */ guard(pagefault)(); + /* + * This optimization is only valid when the task registered for the + * optimized RSEQ_ABI_V2 variant. Some legacy users rely on the original + * RSEQ implementation behaviour which unconditionally updated the IDs. + * rseq_sched_switch_event() ensures that legacy registrations always + * have both sched_switch and ids_changed set, which is compatible with + * the historical TIF_NOTIFY_RESUME behaviour. + */ if (likely(!t->rseq.event.ids_changed)) { struct rseq __user *rseq = t->rseq.usrptr; /* @@ -623,11 +621,9 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t scoped_user_rw_access(rseq, efault) { unsafe_get_user(csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); @@ -640,12 +636,12 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t } struct rseq_ids ids = { - .cpu_id = task_cpu(t), - .mm_cid = task_mm_cid(t), + .cpu_id = task_cpu(t), + .mm_cid = task_mm_cid(t), + .node_id = cpu_to_node(ids.cpu_id), }; - u32 node_id = cpu_to_node(ids.cpu_id); - return rseq_update_usr(t, regs, &ids, node_id); + return rseq_update_usr(t, regs, &ids); efault: return false; }
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 0b42045..85739a6 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h
@@ -9,6 +9,12 @@ #ifdef CONFIG_RSEQ struct rseq; +/* + * rseq_event::has_rseq contains the ABI version number so preserving it + * in AND operations requires a mask. + */ +#define RSEQ_HAS_RSEQ_VERSION_MASK 0xff + /** * struct rseq_event - Storage for rseq related event management * @all: Compound to initialize and clear the data efficiently @@ -17,7 +23,8 @@ struct rseq; * exit to user * @ids_changed: Indicator that IDs need to be updated * @user_irq: True on interrupt entry from user mode - * @has_rseq: True if the task has a rseq pointer installed + * @has_rseq: Greater than 0 if the task has a rseq pointer installed. + * Contains the RSEQ version number * @error: Compound error code for the slow path to analyze * @fatal: User space data corrupted or invalid * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME @@ -59,8 +66,9 @@ struct rseq_event { * compiler emit a single compare on 64-bit * @cpu_id: The CPU ID which was written last to user space * @mm_cid: The MM CID which was written last to user space + * @node_id: The node ID which was written last to user space * - * @cpu_id and @mm_cid are updated when the data is written to user space. + * @cpu_id, @mm_cid and @node_id are updated when the data is written to user space. */ struct rseq_ids { union { @@ -70,6 +78,7 @@ struct rseq_ids { u32 mm_cid; }; }; + u32 node_id; }; /**
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1a3af2e..adb9a4d 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h
@@ -101,6 +101,7 @@ enum scx_ent_flags { SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ SCX_TASK_SUB_INIT = 1 << 4, /* task being initialized for a sub sched */ SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */ + SCX_TASK_OFF_TASKS = 1 << 6, /* removed from scx_tasks by sched_ext_dead() */ /* * Bits 8 and 9 are used to carry task state:
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index dc3975f..cf0fd03 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h
@@ -21,6 +21,11 @@ enum hk_type { HK_TYPE_MAX, /* + * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN + */ + HK_TYPE_KTHREAD = HK_TYPE_DOMAIN, + + /* * The following housekeeping types are only set by the nohz_full * boot commandline option. So they can share the same value. */ @@ -29,7 +34,6 @@ enum hk_type { HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, - HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION
diff --git a/fs/smb/smbdirect/public.h b/include/linux/smbdirect.h similarity index 76% rename from fs/smb/smbdirect/public.h rename to include/linux/smbdirect.h index 5008815..97f5ba7 100644 --- a/fs/smb/smbdirect/public.h +++ b/include/linux/smbdirect.h
@@ -3,18 +3,56 @@ * Copyright (C) 2025, Stefan Metzmacher */ -#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ -#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ +#ifndef __LINUX_SMBDIRECT_H__ +#define __LINUX_SMBDIRECT_H__ -struct smbdirect_buffer_descriptor_v1; -struct smbdirect_socket_parameters; +#include <linux/types.h> + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u64 flags; +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) + __u32 resolve_addr_timeout_msec; + __u32 resolve_route_timeout_msec; + __u32 rdma_connect_timeout_msec; + __u32 negotiate_timeout_msec; + __u16 initiator_depth; /* limited to U8_MAX */ + __u16 responder_resources; /* limited to U8_MAX */ + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 max_frmr_depth; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + +#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) struct smbdirect_socket; struct smbdirect_send_batch; struct smbdirect_mr_io; -#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") - #include <rdma/rw.h> u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); @@ -145,4 +183,4 @@ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, unsigned int rdma_readwrite_threshold, struct seq_file *m); -#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ */ +#endif /* __LINUX_SMBDIRECT_H__ */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ab6cb70..61776245 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h
@@ -534,8 +534,10 @@ alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...) * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(2, 5) struct workqueue_struct * -devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, - int max_active, ...); +devm_alloc_workqueue_noprof(struct device *dev, const char *fmt, + unsigned int flags, int max_active, ...); +#define devm_alloc_workqueue(...) \ + alloc_hooks(devm_alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /**
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7bffb9..aa600fb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h
@@ -2495,7 +2495,7 @@ void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); int hci_abort_conn(struct hci_conn *conn, u8 reason); -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e0ca390..2f312d1 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h
@@ -99,6 +99,7 @@ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ FN(IPV6_BAD_EXTHDR) \ + FN(IPV6_TOO_MANY_EXTHDRS) \ FN(IPV6_NDISC_FRAG) \ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ @@ -494,6 +495,11 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_MINTTL, /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ SKB_DROP_REASON_IPV6_BAD_EXTHDR, + /** + * @SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS: Number of IPv6 extension + * headers in the packet exceeds IP6_MAX_EXT_HDRS_CNT. + */ + SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS, /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */ SKB_DROP_REASON_IPV6_NDISC_FRAG, /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 72d325c..02762ce 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h
@@ -491,6 +491,7 @@ struct ip_vs_est_kt_data { DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ unsigned long est_timer; /* estimation timer (jiffies) */ struct ip_vs_stats *calc_stats; /* Used for calculation */ + int needed; /* task is needed */ int tick_len[IPVS_EST_NTICKS]; /* est count */ int id; /* ktid per netns */ int chain_max; /* max ests per tick chain */ @@ -1411,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { if (ipvs->est_cpulist_valid) return ipvs->sysctl_est_cpulist; @@ -1529,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { return housekeeping_cpumask(HK_TYPE_KTHREAD); } @@ -1564,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) return READ_ONCE(ipvs->sysctl_svc_lfactor); } +static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_empty(__sysctl_est_cpulist(ipvs)); +} + +static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_weight(__sysctl_est_cpulist(ipvs)); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1884,18 +1897,26 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); -void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart); int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd); void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); +static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + ipvs->tot_stats->s.est.ktid = -2; +#endif +} + static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) { #ifdef CONFIG_SYSCTL /* Stop tasks while cpulist is empty or if disabled with flag */ ipvs->est_stopped = !sysctl_run_estimation(ipvs) || (ipvs->est_cpulist_valid && - cpumask_empty(sysctl_est_cpulist(ipvs))); + sysctl_est_cpulist_empty(ipvs)); #endif } @@ -1911,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) { unsigned int limit = IPVS_EST_CPU_KTHREADS * - cpumask_weight(sysctl_est_cpulist(ipvs)); + sysctl_est_cpulist_weight(ipvs); return max(1U, limit); }
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d042afe..1dec81f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h
@@ -90,6 +90,9 @@ struct ip_tunnel_info; #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ +/* Hard limit on traversed IPv6 extension headers */ +#define IP6_MAX_EXT_HDRS_CNT 12 + /* * Addr type *
diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h index 5199b41..dbabcfb 100644 --- a/include/net/mana/shm_channel.h +++ b/include/net/mana/shm_channel.h
@@ -4,6 +4,12 @@ #ifndef _SHM_CHANNEL_H #define _SHM_CHANNEL_H +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) +#define SMC_APERTURE_SIZE (SMC_APERTURE_BITS / 8) + struct shm_channel { struct device *dev; void __iomem *base;
diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index b175d271..609bcf4 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h
@@ -3,10 +3,23 @@ #define _NF_DUP_NETDEV_H_ #include <net/netfilter/nf_tables.h> +#include <linux/netdevice.h> +#include <linux/sched.h> void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); +#define NF_RECURSION_LIMIT 2 + +static inline u8 *nf_get_nf_dup_skb_recursion(void) +{ +#ifndef CONFIG_PREEMPT_RT + return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); +#else + return ¤t->net_xmit.nf_dup_skb_recursion; +#endif +} + struct nft_offload_ctx; struct nft_flow_rule;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b09c11c..7b23b24 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h
@@ -148,9 +148,10 @@ struct flow_offload_tuple { /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; - u8 dir:2, + u16 dir:2, xmit_type:3, encap_num:2, + needs_gso_segment:1, tun_num:2, in_vlan_ingress:2; u16 mtu; @@ -232,6 +233,7 @@ struct nf_flow_route { u32 hw_ifindex; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + u8 needs_gso_segment:1; } out; enum flow_offload_xmit_type xmit_type; } tuple[FLOW_OFFLOAD_DIR_MAX];
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80ccd4d..6e27c56 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h
@@ -275,7 +275,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES - struct mr_table *mrt; + struct mr_table __rcu *mrt; #else struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops;
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index f69344f..ca6fe1f 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h
@@ -28,7 +28,7 @@ enum rseq_cs_flags_bit { RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, - /* (3) Intentional gap to put new bits into a separate byte */ + /* (3) Intentional gap to keep new bits separate */ /* User read only feature flags */ RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, @@ -161,6 +161,9 @@ struct rseq { * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE + * + * It is now used for feature status advertisement by the kernel. + * See: enum rseq_cs_flags_bit for further information. */ __u32 flags;
diff --git a/include/ufs/unipro.h b/include/ufs/unipro.h index f849a2a..9c16870 100644 --- a/include/ufs/unipro.h +++ b/include/ufs/unipro.h
@@ -333,6 +333,11 @@ enum ufs_eom_eye_mask { #define DME_LocalTC0ReplayTimeOutVal 0xD042 #define DME_LocalAFC0ReqTimeOutVal 0xD043 +enum ufs_op_mode { + LS_MODE = 1, + HS_MODE = 2, +}; + /* PA power modes */ enum ufs_pa_pwr_mode { FAST_MODE = 1,
diff --git a/include/video/imx-ipu-image-convert.h b/include/video/imx-ipu-image-convert.h index 003b392..6b77968 100644 --- a/include/video/imx-ipu-image-convert.h +++ b/include/video/imx-ipu-image-convert.h
@@ -27,12 +27,13 @@ struct ipu_image_convert_run { int status; + /* private: */ /* internal to image converter, callers don't touch */ struct list_head list; }; /** - * ipu_image_convert_cb_t - conversion callback function prototype + * typedef ipu_image_convert_cb_t - conversion callback function prototype * * @run: the completed conversion run pointer * @ctx: a private context pointer for the callback @@ -60,7 +61,7 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, * @out: output image format * @rot_mode: rotation mode * - * Returns 0 if the formats and rotation mode meet IPU restrictions, + * Returns: 0 if the formats and rotation mode meet IPU restrictions, * -EINVAL otherwise. */ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, @@ -77,11 +78,11 @@ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, * @complete: run completion callback * @complete_context: a context pointer for the completion callback * - * Returns an opaque conversion context pointer on success, error pointer + * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. + * + * Returns: an opaque conversion context pointer on success, error pointer * on failure. The input/output formats and rotation mode must already meet * IPU retrictions. - * - * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. */ struct ipu_image_convert_ctx * ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, @@ -122,6 +123,8 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx); * In V4L2, drivers should call ipu_image_convert_queue() while * streaming to queue the conversion of a received input buffer. * For example mem2mem devices this would be called in .device_run. + * + * Returns: 0 on success or -errno on error. */ int ipu_image_convert_queue(struct ipu_image_convert_run *run); @@ -155,6 +158,9 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx); * On successful return the caller can queue more run requests if needed, using * the prepared context in run->ctx. The caller is responsible for unpreparing * the context when no more conversion requests are needed. + * + * Returns: pointer to the created &struct ipu_image_convert_run that has + * been queued on success; an ERR_PTR(errno) on error. */ struct ipu_image_convert_run * ipu_image_convert(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
diff --git a/include/video/udlfb.h b/include/video/udlfb.h index 58fb573..ab34790 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h
@@ -56,6 +56,7 @@ struct dlfb_data { spinlock_t damage_lock; struct work_struct damage_work; struct fb_ops ops; + atomic_t mmap_count; /* blit-only rendering path metrics, exposed through sysfs */ atomic_t bytes_rendered; /* raw pixel-bytes driver asked to render */ atomic_t bytes_identical; /* saved effort with backbuffer comparison */
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index 3da0285..d656cc2 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c
@@ -43,6 +43,7 @@ static void io_eventfd_do_signal(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops); eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); io_eventfd_put(ev_fd); }
diff --git a/io_uring/napi.c b/io_uring/napi.c index 8d68366..bfc7714 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c
@@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t) return ns_to_ktime(t << 10); } -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode) { struct hlist_head *hash_list; struct io_napi_entry *e; @@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) * kfree() */ spin_lock(&ctx->napi_lock); + if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) { + spin_unlock(&ctx->napi_lock); + kfree(e); + return -EINVAL; + } if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); @@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx, bool (*loop_end)(void *, unsigned long), void *loop_end_arg) { - if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) + switch (READ_ONCE(ctx->napi_track_mode)) { + case IO_URING_NAPI_TRACKING_STATIC: return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); - return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + case IO_URING_NAPI_TRACKING_DYNAMIC: + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + default: + return false; + } } static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, @@ -273,13 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx, default: return -EINVAL; } - /* clean the napi list for new settings */ + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); io_napi_free(ctx); - WRITE_ONCE(ctx->napi_track_mode, napi->op_param); /* cap NAPI at 10 msec of spin time */ napi->busy_poll_to = min(10000, napi->busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); return 0; } @@ -315,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) case IO_URING_NAPI_STATIC_ADD_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; - return __io_napi_add_id(ctx, napi.op_param); + return __io_napi_add_id(ctx, napi.op_param, + IO_URING_NAPI_TRACKING_STATIC); case IO_URING_NAPI_STATIC_DEL_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; @@ -343,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); + io_napi_free(ctx); return 0; }
diff --git a/io_uring/napi.h b/io_uring/napi.h index fa742f4..e0aeccc 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h
@@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct socket *sock; + unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC; - if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) + if (READ_ONCE(ctx->napi_track_mode) != mode) return; sock = sock_from_file(req->file); if (sock && sock->sk) - __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); + __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode); } #else
diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 4cfdfc51..e2595ca 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c
@@ -3,6 +3,7 @@ #include <linux/errno.h> #include <linux/file.h> #include <linux/io_uring.h> +#include <linux/time_namespace.h> #include <trace/events/io_uring.h> @@ -35,6 +36,22 @@ struct io_timeout_rem { bool ltimeout; }; +static clockid_t io_flags_to_clock(unsigned flags) +{ + switch (flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) { struct timespec64 ts; @@ -43,7 +60,7 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) *time = ns_to_ktime(arg); if (*time < 0) return -EINVAL; - return 0; + goto out; } if (get_timespec64(&ts, u64_to_user_ptr(arg))) @@ -51,6 +68,9 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) if (ts.tv_sec < 0 || ts.tv_nsec < 0) return -EINVAL; *time = timespec64_to_ktime(ts); +out: + if (flags & IORING_TIMEOUT_ABS) + *time = timens_ktime_to_host(io_flags_to_clock(flags), *time); return 0; } @@ -399,18 +419,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) static clockid_t io_timeout_get_clock(struct io_timeout_data *data) { - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { - case IORING_TIMEOUT_BOOTTIME: - return CLOCK_BOOTTIME; - case IORING_TIMEOUT_REALTIME: - return CLOCK_REALTIME; - default: - /* can't happen, vetted at prep time */ - WARN_ON_ONCE(1); - fallthrough; - case 0: - return CLOCK_MONOTONIC; - } + return io_flags_to_clock(data->flags); } static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
diff --git a/io_uring/wait.c b/io_uring/wait.c index 91df86c..ec01e78 100644 --- a/io_uring/wait.c +++ b/io_uring/wait.c
@@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/io_uring.h> +#include <linux/time_namespace.h> #include <trace/events/io_uring.h> @@ -229,7 +230,10 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (ext_arg->ts_set) { iowq.timeout = timespec64_to_ktime(ext_arg->ts); - if (!(flags & IORING_ENTER_ABS_TIMER)) + if (flags & IORING_ENTER_ABS_TIMER) + iowq.timeout = timens_ktime_to_host(ctx->clockid, + iowq.timeout); + else iowq.timeout = ktime_add(iowq.timeout, start_time); }
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 802656c6..49a8f7b 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c
@@ -511,7 +511,7 @@ static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 { struct bpf_arena *arena = container_of(map, struct bpf_arena, map); - if ((u64)off > arena->user_vm_end - arena->user_vm_start) + if ((u64)off >= arena->user_vm_end - arena->user_vm_start) return -ERANGE; *imm = (unsigned long)arena->user_vm_start; return 0;
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 332e6e0..58197d7 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c
@@ -1914,26 +1914,15 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env) return -ENOMEM; } - instance = call_instance(env, NULL, 0, 0); - if (IS_ERR(instance)) { - err = PTR_ERR(instance); - goto out; - } - err = analyze_subprog(env, NULL, info, instance, callsites); - if (err) - goto out; - /* - * Subprogs and callbacks that don't receive FP-derived arguments - * cannot access ancestor stack frames, so they were skipped during - * the recursive walk above. Async callbacks (timer, workqueue) are - * also not reachable from the main program's call graph. Analyze - * all unvisited subprogs as independent roots at depth 0. + * Analyze every subprog in reverse topological order (callers + * before callees) so that each subprog is analyzed before its + * callees, allowing the recursive walk inside analyze_subprog() + * to naturally reach callees that receive FP-derived args. * - * Use reverse topological order (callers before callees) so that - * each subprog is analyzed before its callees, allowing the - * recursive walk inside analyze_subprog() to naturally - * reach nested callees that also lack FP-derived args. + * Subprogs and callbacks that don't receive FP-derived arguments + * cannot access ancestor stack frames are analyzed independently. + * Async callbacks (timer, workqueue) are handled the same way. */ for (k = env->subprog_cnt - 1; k >= 0; k--) { int sub = env->subprog_topo_order[k];
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 45c0b1e..6152add 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c
@@ -264,10 +264,12 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, struct task_struct *task); static int cgroup_destroy_locked(struct cgroup *cgrp); +static void cgroup_finish_destroy(struct cgroup *cgrp); +static void kill_css_sync(struct cgroup_subsys_state *css); +static void kill_css_finish(struct cgroup_subsys_state *css); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, struct cgroup_subsys *ss); static void css_release(struct percpu_ref *ref); -static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype cfts[], bool is_add); @@ -797,6 +799,16 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) if (was_populated == cgroup_is_populated(cgrp)) break; + /* + * Subtree just emptied below an offlined cgrp. Fire deferred + * destroy. The transition is one-shot. + */ + if (was_populated && !css_is_online(&cgrp->self)) { + cgroup_get(cgrp); + WARN_ON_ONCE(!queue_work(cgroup_offline_wq, + &cgrp->finish_destroy_work)); + } + cgroup1_check_for_release(cgrp); TRACE_CGROUP_PATH(notify_populated, cgrp, cgroup_is_populated(cgrp)); @@ -2039,6 +2051,16 @@ static int cgroup_reconfigure(struct fs_context *fc) return 0; } +static void cgroup_finish_destroy_work_fn(struct work_struct *work) +{ + struct cgroup *cgrp = container_of(work, struct cgroup, finish_destroy_work); + + cgroup_lock(); + cgroup_finish_destroy(cgrp); + cgroup_unlock(); + cgroup_put(cgrp); +} + static void init_cgroup_housekeeping(struct cgroup *cgrp) { struct cgroup_subsys *ss; @@ -2065,7 +2087,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); - init_waitqueue_head(&cgrp->dying_populated_waitq); + INIT_WORK(&cgrp->finish_destroy_work, cgroup_finish_destroy_work_fn); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -3375,7 +3397,8 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { - kill_css(css); + kill_css_sync(css); + kill_css_finish(css); } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) @@ -5067,10 +5090,12 @@ static void css_task_iter_advance(struct css_task_iter *it) task = list_entry(it->task_pos, struct task_struct, cg_list); /* - * Hide tasks that are exiting but not yet removed. Keep zombie - * leaders with live threads visible. + * Hide tasks that are exiting but not yet removed by default. Keep + * zombie leaders with live threads visible. Usages that need to walk + * every existing task can opt out via CSS_TASK_ITER_WITH_DEAD. */ - if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) + if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) && + (task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) goto repeat; if (it->flags & CSS_TASK_ITER_PROCS) { @@ -5514,7 +5539,7 @@ static struct cftype cgroup_psi_files[] = { * css destruction is four-stage process. * * 1. Destruction starts. Killing of the percpu_ref is initiated. - * Implemented in kill_css(). + * Implemented in kill_css_finish(). * * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs * and thus css_tryget_online() is guaranteed to fail, the css can be @@ -5993,7 +6018,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) /* * This is called when the refcnt of a css is confirmed to be killed. * css_tryget_online() is now guaranteed to fail. Tell the subsystem to - * initiate destruction and put the css ref from kill_css(). + * initiate destruction and put the css ref from kill_css_finish(). */ static void css_killed_work_fn(struct work_struct *work) { @@ -6026,15 +6051,12 @@ static void css_killed_ref_fn(struct percpu_ref *ref) } /** - * kill_css - destroy a css - * @css: css to destroy + * kill_css_sync - synchronous half of css teardown + * @css: css being killed * - * This function initiates destruction of @css by removing cgroup interface - * files and putting its base reference. ->css_offline() will be invoked - * asynchronously once css_tryget_online() is guaranteed to fail and when - * the reference count reaches zero, @css will be released. + * See cgroup_destroy_locked(). */ -static void kill_css(struct cgroup_subsys_state *css) +static void kill_css_sync(struct cgroup_subsys_state *css) { struct cgroup_subsys *ss = css->ss; @@ -6057,24 +6079,6 @@ static void kill_css(struct cgroup_subsys_state *css) */ css_clear_dir(css); - /* - * Killing would put the base ref, but we need to keep it alive - * until after ->css_offline(). - */ - css_get(css); - - /* - * cgroup core guarantees that, by the time ->css_offline() is - * invoked, no new css reference will be given out via - * css_tryget_online(). We can't simply call percpu_ref_kill() and - * proceed to offlining css's because percpu_ref_kill() doesn't - * guarantee that the ref is seen as killed on all CPUs on return. - * - * Use percpu_ref_kill_and_confirm() to get notifications as each - * css is confirmed to be seen as killed on all CPUs. - */ - percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); - css->cgroup->nr_dying_subsys[ss->id]++; /* * Parent css and cgroup cannot be freed until after the freeing @@ -6087,44 +6091,88 @@ static void kill_css(struct cgroup_subsys_state *css) } /** - * cgroup_destroy_locked - the first stage of cgroup destruction + * kill_css_finish - deferred half of css teardown + * @css: css being killed + * + * See cgroup_destroy_locked(). + */ +static void kill_css_finish(struct cgroup_subsys_state *css) +{ + lockdep_assert_held(&cgroup_mutex); + + /* + * Skip on re-entry: cgroup_apply_control_disable() may have killed @css + * earlier. cgroup_destroy_locked() can still walk it because + * offline_css() (which NULLs cgrp->subsys[ssid]) runs async. + */ + if (percpu_ref_is_dying(&css->refcnt)) + return; + + /* + * Killing would put the base ref, but we need to keep it alive until + * after ->css_offline(). + */ + css_get(css); + + /* + * cgroup core guarantees that, by the time ->css_offline() is invoked, + * no new css reference will be given out via css_tryget_online(). We + * can't simply call percpu_ref_kill() and proceed to offlining css's + * because percpu_ref_kill() doesn't guarantee that the ref is seen as + * killed on all CPUs on return. + * + * Use percpu_ref_kill_and_confirm() to get notifications as each css is + * confirmed to be seen as killed on all CPUs. + */ + percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); +} + +/** + * cgroup_destroy_locked - destroy @cgrp (called on rmdir) * @cgrp: cgroup to be destroyed * - * css's make use of percpu refcnts whose killing latency shouldn't be - * exposed to userland and are RCU protected. Also, cgroup core needs to - * guarantee that css_tryget_online() won't succeed by the time - * ->css_offline() is invoked. To satisfy all the requirements, - * destruction is implemented in the following two steps. + * Tear down @cgrp on behalf of rmdir. Constraints: * - * s1. Verify @cgrp can be destroyed and mark it dying. Remove all - * userland visible parts and start killing the percpu refcnts of - * css's. Set up so that the next stage will be kicked off once all - * the percpu refcnts are confirmed to be killed. + * - Userspace: rmdir must succeed when cgroup.procs and friends are empty. * - * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the - * rest of destruction. Once all cgroup references are gone, the - * cgroup is RCU-freed. + * - Kernel: subsystem ->css_offline() must not run while any task in @cgrp's + * subtree is still doing kernel work. A task hidden from cgroup.procs (past + * exit_signals() with signal->live cleared) can still schedule, allocate, and + * consume resources until its final context switch. Dying descendants in the + * subtree can host such tasks too. * - * This function implements s1. After this step, @cgrp is gone as far as - * the userland is concerned and a new cgroup with the same name may be - * created. As cgroup doesn't care about the names internally, this - * doesn't cause any problem. + * - Kernel: css_tryget_online() must fail by the time ->css_offline() runs. + * + * The destruction runs in three parts: + * + * - This function: synchronous user-visible state teardown plus kill_css_sync() + * on each subsystem css. + * + * - cgroup_finish_destroy(): kicks the percpu_ref kill via kill_css_finish() on + * each subsystem css. Fires once @cgrp's subtree is fully drained, either + * inline here or from cgroup_update_populated(). + * + * - The percpu_ref kill chain: css_killed_ref_fn -> css_killed_work_fn -> + * ->css_offline() -> release/free. + * + * Return 0 on success, -EBUSY if a userspace-visible task or an online child + * remains. */ static int cgroup_destroy_locked(struct cgroup *cgrp) - __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup *tcgrp, *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *css; struct cgrp_cset_link *link; + struct css_task_iter it; + struct task_struct *task; int ssid, ret; lockdep_assert_held(&cgroup_mutex); - /* - * Only migration can raise populated from zero and we're already - * holding cgroup_mutex. - */ - if (cgroup_is_populated(cgrp)) + css_task_iter_start(&cgrp->self, 0, &it); + task = css_task_iter_next(&it); + css_task_iter_end(&it); + if (task) return -EBUSY; /* @@ -6148,9 +6196,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) link->cset->dead = true; spin_unlock_irq(&css_set_lock); - /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) - kill_css(css); + kill_css_sync(css); /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */ css_clear_dir(&cgrp->self); @@ -6181,79 +6228,27 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); + if (!cgroup_is_populated(cgrp)) + cgroup_finish_destroy(cgrp); + return 0; }; /** - * cgroup_drain_dying - wait for dying tasks to leave before rmdir - * @cgrp: the cgroup being removed + * cgroup_finish_destroy - deferred half of @cgrp destruction + * @cgrp: cgroup whose subtree just became empty * - * cgroup.procs and cgroup.threads use css_task_iter which filters out - * PF_EXITING tasks so that userspace doesn't see tasks that have already been - * reaped via waitpid(). However, cgroup_has_tasks() - which tests whether the - * cgroup has non-empty css_sets - is only updated when dying tasks pass through - * cgroup_task_dead() in finish_task_switch(). This creates a window where - * cgroup.procs reads empty but cgroup_has_tasks() is still true, making rmdir - * fail with -EBUSY from cgroup_destroy_locked() even though userspace sees no - * tasks. - * - * This function aligns cgroup_has_tasks() with what userspace can observe. If - * cgroup_has_tasks() but the task iterator sees nothing (all remaining tasks are - * PF_EXITING), we wait for cgroup_task_dead() to finish processing them. As the - * window between PF_EXITING and cgroup_task_dead() is short, the wait is brief. - * - * This function only concerns itself with this cgroup's own dying tasks. - * Whether the cgroup has children is cgroup_destroy_locked()'s problem. - * - * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we - * retry the full check from scratch. - * - * Must be called with cgroup_mutex held. + * See cgroup_destroy_locked() for the rationale. */ -static int cgroup_drain_dying(struct cgroup *cgrp) - __releases(&cgroup_mutex) __acquires(&cgroup_mutex) +static void cgroup_finish_destroy(struct cgroup *cgrp) { - struct css_task_iter it; - struct task_struct *task; - DEFINE_WAIT(wait); + struct cgroup_subsys_state *css; + int ssid; lockdep_assert_held(&cgroup_mutex); -retry: - if (!cgroup_has_tasks(cgrp)) - return 0; - /* Same iterator as cgroup.threads - if any task is visible, it's busy */ - css_task_iter_start(&cgrp->self, 0, &it); - task = css_task_iter_next(&it); - css_task_iter_end(&it); - - if (task) - return -EBUSY; - - /* - * All remaining tasks are PF_EXITING and will pass through - * cgroup_task_dead() shortly. Wait for a kick and retry. - * - * cgroup_has_tasks() can't transition from false to true while we're - * holding cgroup_mutex, but the true to false transition happens - * under css_set_lock (via cgroup_task_dead()). We must retest and - * prepare_to_wait() under css_set_lock. Otherwise, the transition - * can happen between our first test and prepare_to_wait(), and we - * sleep with no one to wake us. - */ - spin_lock_irq(&css_set_lock); - if (!cgroup_has_tasks(cgrp)) { - spin_unlock_irq(&css_set_lock); - return 0; - } - prepare_to_wait(&cgrp->dying_populated_waitq, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); - schedule(); - finish_wait(&cgrp->dying_populated_waitq, &wait); - mutex_lock(&cgroup_mutex); - goto retry; + for_each_css(css, ssid, cgrp) + kill_css_finish(css); } int cgroup_rmdir(struct kernfs_node *kn) @@ -6265,12 +6260,9 @@ int cgroup_rmdir(struct kernfs_node *kn) if (!cgrp) return 0; - ret = cgroup_drain_dying(cgrp); - if (!ret) { - ret = cgroup_destroy_locked(cgrp); - if (!ret) - TRACE_CGROUP_PATH(rmdir, cgrp); - } + ret = cgroup_destroy_locked(cgrp); + if (!ret) + TRACE_CGROUP_PATH(rmdir, cgrp); cgroup_kn_unlock(kn); return ret; @@ -7030,7 +7022,6 @@ void cgroup_task_exit(struct task_struct *tsk) static void do_cgroup_task_dead(struct task_struct *tsk) { - struct cgrp_cset_link *link; struct css_set *cset; unsigned long flags; @@ -7044,11 +7035,6 @@ static void do_cgroup_task_dead(struct task_struct *tsk) if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); - /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */ - list_for_each_entry(link, &cset->cgrp_links, cgrp_link) - if (waitqueue_active(&link->cgrp->dying_populated_waitq)) - wake_up(&link->cgrp->dying_populated_waitq); - if (dl_task(tsk)) dec_dl_tasks_cs(tsk);
diff --git a/kernel/events/core.c b/kernel/events/core.c index 6d1f8ba..7935d56 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) } static void perf_pmu_output_stop(struct perf_event *event); +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb); /* * A buffer can be mmap()ed multiple times; either directly through the same @@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma) mapped_f unmapped = get_mapped(event, event_unmapped); struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; - int mmap_locked = rb->mmap_locked; - unsigned long size = perf_data_size(rb); bool detach_rest = false; /* FIXIES vs perf_pmu_unregister() */ @@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) * Aside from that, this buffer is 'fully' detached and unmapped, * undo the VM accounting. */ - - atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, - &mmap_user->locked_vm); - atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); - free_uid(mmap_user); + perf_mmap_unaccount(vma, rb); out_put: ring_buffer_put(rb); /* could be last */ @@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long atomic64_add(extra, &vma->vm_mm->pinned_vm); } +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb) +{ + struct user_struct *user = rb->mmap_user; + + atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked, + &user->locked_vm); + atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm); +} + static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, unsigned long nr_pages) { @@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (!rb) return -ENOMEM; - refcount_set(&rb->mmap_count, 1); - rb->mmap_user = get_current_user(); rb->mmap_locked = extra; ring_buffer_attach(event, rb); @@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mapped(event, vma->vm_mm); /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). + * Try to map it into the page table. On fail undo the above, + * as the callsite expects full cleanup in this case and + * therefore does not invoke vmops::close(). */ ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); + if (likely(!ret)) + return 0; + + /* Error path */ + + /* + * If this is the first mmap(), then event->mmap_count should + * be stable at 1. It is only modified by: + * perf_mmap_{open,close}() and perf_mmap(). + * + * The former are not possible because this mmap() hasn't been + * successful yet, and the latter is serialized by + * event->mmap_mutex which we still hold (note that mmap_lock + * is not strictly sufficient here, because the event fd can + * be passed to another process through trivial means like + * fork(), leading to concurrent mmap() from different mm). + * + * Make sure to remove event->rb before releasing + * event->mmap_mutex, such that any concurrent mmap() will not + * attempt use this failed buffer. + */ + if (refcount_read(&event->mmap_count) == 1) { + /* + * Minimal perf_mmap_close(); there can't be AUX or + * other events on account of this being the first. + */ + mapped = get_mapped(event, event_unmapped); + if (mapped) + mapped(event, vma->vm_mm); + perf_mmap_unaccount(vma, event->rb); + ring_buffer_attach(event, NULL); /* drops last rb->refcount */ + refcount_set(&event->mmap_count, 0); + return ret; + } + + /* + * Otherwise this is an already existing buffer, and there is + * no race vs first exposure, so fall-through and call + * perf_mmap_close(). + */ } + perf_mmap_close(vma); return ret; }
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d9cc570..c03c4f2 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head) struct perf_buffer *rb; rb = container_of(rcu_head, struct perf_buffer, rcu_head); + free_uid(rb->mmap_user); rb_free(rb); }
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 3e7de26..9fe9216 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) rb->paused = 1; mutex_init(&rb->aux_mutex); + rb->mmap_user = get_current_user(); + refcount_set(&rb->mmap_count, 1); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
diff --git a/kernel/exit.c b/kernel/exit.c index 25e9cb6..9a90999 100644 --- a/kernel/exit.c +++ b/kernel/exit.c
@@ -1073,6 +1073,7 @@ void __noreturn make_task_dead(int signr) futex_exit_recursive(tsk); tsk->exit_state = EXIT_DEAD; refcount_inc(&tsk->rcu_users); + preempt_disable(); do_task_dead(); }
diff --git a/kernel/rseq.c b/kernel/rseq.c index 38d3ef5..e75e3a5e 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c
@@ -236,11 +236,6 @@ static int __init rseq_debugfs_init(void) } __initcall(rseq_debugfs_init); -static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id) -{ - return rseq_set_ids_get_csaddr(t, ids, node_id, NULL); -} - static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs) { struct rseq __user *urseq = t->rseq.usrptr; @@ -258,14 +253,16 @@ static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs) static void rseq_slowpath_update_usr(struct pt_regs *regs) { /* - * Preserve rseq state and user_irq state. The generic entry code - * clears user_irq on the way out, the non-generic entry - * architectures are not having user_irq. + * Preserve has_rseq and user_irq state. The generic entry code clears + * user_irq on the way out, the non-generic entry architectures are not + * setting user_irq. */ - const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, }; + const struct rseq_event evt_mask = { + .has_rseq = RSEQ_HAS_RSEQ_VERSION_MASK, + .user_irq = true, + }; struct task_struct *t = current; struct rseq_ids ids; - u32 node_id; bool event; if (unlikely(t->flags & PF_EXITING)) @@ -301,9 +298,9 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs) if (!event) return; - node_id = cpu_to_node(ids.cpu_id); + ids.node_id = cpu_to_node(ids.cpu_id); - if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) { + if (unlikely(!rseq_update_usr(t, regs, &ids))) { /* * Clear the errors just in case this might survive magically, but * leave the rest intact. @@ -335,8 +332,9 @@ void __rseq_handle_slowpath(struct pt_regs *regs) void __rseq_signal_deliver(int sig, struct pt_regs *regs) { rseq_stat_inc(rseq_stats.signal); + /* - * Don't update IDs, they are handled on exit to user if + * Don't update IDs yet, they are handled on exit to user if * necessary. The important thing is to abort a critical section of * the interrupted context as after this point the instruction * pointer in @regs points to the signal handler. @@ -349,6 +347,13 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs) current->rseq.event.error = 0; force_sigsegv(sig); } + + /* + * In legacy mode, force the update of IDs before returning to user + * space to stay compatible. + */ + if (!rseq_v2(current)) + rseq_force_update(); } /* @@ -384,19 +389,22 @@ void rseq_syscall(struct pt_regs *regs) static bool rseq_reset_ids(void) { - struct rseq_ids ids = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, - .mm_cid = 0, - }; + struct rseq __user *rseq = current->rseq.usrptr; /* * If this fails, terminate it because this leaves the kernel in * stupid state as exit to user space will try to fixup the ids * again. */ - if (rseq_set_ids(current, &ids, 0)) - return true; + scoped_user_rw_access(rseq, efault) { + unsafe_put_user(0, &rseq->cpu_id_start, efault); + unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); + unsafe_put_user(0, &rseq->node_id, efault); + unsafe_put_user(0, &rseq->mm_cid, efault); + } + return true; +efault: force_sig(SIGSEGV); return false; } @@ -404,70 +412,29 @@ static bool rseq_reset_ids(void) /* The original rseq structure size (including padding) is 32 bytes. */ #define ORIG_RSEQ_SIZE 32 -/* - * sys_rseq - setup restartable sequences for caller thread. - */ -SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) +static long rseq_register(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) { u32 rseqfl = 0; + u8 version = 1; - if (flags & RSEQ_FLAG_UNREGISTER) { - if (flags & ~RSEQ_FLAG_UNREGISTER) - return -EINVAL; - /* Unregister rseq for current thread. */ - if (current->rseq.usrptr != rseq || !current->rseq.usrptr) - return -EINVAL; - if (rseq_len != current->rseq.len) - return -EINVAL; - if (current->rseq.sig != sig) - return -EPERM; - if (!rseq_reset_ids()) - return -EFAULT; - rseq_reset(current); - return 0; - } - - if (unlikely(flags & ~(RSEQ_FLAG_SLICE_EXT_DEFAULT_ON))) - return -EINVAL; - - if (current->rseq.usrptr) { - /* - * If rseq is already registered, check whether - * the provided address differs from the prior - * one. - */ - if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) - return -EINVAL; - if (current->rseq.sig != sig) - return -EPERM; - /* Already registered. */ - return -EBUSY; - } - - /* - * If there was no rseq previously registered, ensure the provided rseq - * is properly aligned, as communcated to user-space through the ELF - * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq - * size, the required alignment is the original struct rseq alignment. - * - * The rseq_len is required to be greater or equal to the original rseq - * size. In order to be valid, rseq_len is either the original rseq size, - * or large enough to contain all supported fields, as communicated to - * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. - */ - if (rseq_len < ORIG_RSEQ_SIZE || - (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || - rseq_len < offsetof(struct rseq, end)))) - return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; - if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { - rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; - if (rseq_slice_extension_enabled() && - (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON)) - rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + /* + * Architectures, which use the generic IRQ entry code (at least) enable + * registrations with a size greater than the original v1 fixed sized + * @rseq_len, which has been validated already to utilize the optimized + * v2 ABI mode which also enables extended RSEQ features beyond MMCID. + */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && rseq_len > ORIG_RSEQ_SIZE) + version = 2; + + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION) && version > 1) { + if (rseq_slice_extension_enabled()) { + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + if (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + } } scoped_user_write_access(rseq, efault) { @@ -485,7 +452,15 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); unsafe_put_user(0U, &rseq->node_id, efault); unsafe_put_user(0U, &rseq->mm_cid, efault); - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + + /* + * All fields past mm_cid are only valid for non-legacy v2 + * registrations. + */ + if (version > 1) { + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } } /* @@ -501,11 +476,10 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 #endif /* - * If rseq was previously inactive, and has just been - * registered, ensure the cpu_id_start and cpu_id fields - * are updated before returning to user-space. + * Ensure the cpu_id_start and cpu_id fields are updated before + * returning to user-space. */ - current->rseq.event.has_rseq = true; + current->rseq.event.has_rseq = version; rseq_force_update(); return 0; @@ -513,6 +487,80 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 return -EFAULT; } +static long rseq_unregister(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) +{ + if (flags & ~RSEQ_FLAG_UNREGISTER) + return -EINVAL; + if (current->rseq.usrptr != rseq || !current->rseq.usrptr) + return -EINVAL; + if (rseq_len != current->rseq.len) + return -EINVAL; + if (current->rseq.sig != sig) + return -EPERM; + if (!rseq_reset_ids()) + return -EFAULT; + rseq_reset(current); + return 0; +} + +static long rseq_reregister(struct rseq __user * rseq, u32 rseq_len, u32 sig) +{ + /* + * If rseq is already registered, check whether the provided address + * differs from the prior one. + */ + if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) + return -EINVAL; + if (current->rseq.sig != sig) + return -EPERM; + /* Already registered. */ + return -EBUSY; +} + +static bool rseq_length_valid(struct rseq __user *rseq, unsigned int rseq_len) +{ + /* + * Ensure the provided rseq is properly aligned, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_ALIGN. If + * rseq_len is the original rseq size, the required alignment is the + * original struct rseq alignment. + * + * In order to be valid, rseq_len is either the original rseq size, or + * large enough to contain all supported fields, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. + */ + if (rseq_len < ORIG_RSEQ_SIZE) + return false; + + if (rseq_len == ORIG_RSEQ_SIZE) + return IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE); + + return IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) && + rseq_len >= offsetof(struct rseq, end); +} + +#define RSEQ_FLAGS_SUPPORTED (RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) + +/* + * sys_rseq - Register or unregister restartable sequences for the caller thread. + */ +SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) +{ + if (flags & RSEQ_FLAG_UNREGISTER) + return rseq_unregister(rseq, rseq_len, flags, sig); + + if (unlikely(flags & ~RSEQ_FLAGS_SUPPORTED)) + return -EINVAL; + + if (current->rseq.usrptr) + return rseq_reregister(rseq, rseq_len, sig); + + if (!rseq_length_valid(rseq, rseq_len)) + return -EINVAL; + + return rseq_register(rseq, rseq_len, flags, sig); +} + #ifdef CONFIG_RSEQ_SLICE_EXTENSION struct slice_timer { struct hrtimer timer; @@ -713,6 +761,8 @@ int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) return -ENOTSUPP; if (!current->rseq.usrptr) return -ENXIO; + if (!rseq_v2(current)) + return -ENOTSUPP; /* No change? */ if (enable == !!current->rseq.slice.state.enabled)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 345aa11..38d90ba 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c
@@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); iter->cgrp = cgrp; iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self); - css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD, + &iter->css_iter); return; } #endif @@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) iter->css_pos = css_next_descendant_pre(iter->css_pos, &iter->cgrp->self); if (iter->css_pos) - css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD, + &iter->css_iter); } return NULL; } @@ -926,16 +928,27 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter) * * Test for idle_sched_class as only init_tasks are on it. */ - if (p->sched_class != &idle_sched_class) - break; + if (p->sched_class == &idle_sched_class) + continue; + + iter->rq = task_rq_lock(p, &iter->rf); + iter->locked_task = p; + + /* + * cgroup_task_dead() removes the dead tasks from cset->tasks + * after sched_ext_dead() and cgroup iteration may see tasks + * which already finished sched_ext_dead(). %SCX_TASK_OFF_TASKS + * is set by sched_ext_dead() under @p's rq lock. Test it to + * avoid visiting tasks which are already dead from SCX POV. + */ + if (p->scx.flags & SCX_TASK_OFF_TASKS) { + __scx_task_iter_rq_unlock(iter); + continue; + } + + return p; } - if (!p) - return NULL; - - iter->rq = task_rq_lock(p, &iter->rf); - iter->locked_task = p; - - return p; + return NULL; } /** @@ -3848,6 +3861,11 @@ void sched_ext_dead(struct task_struct *p) /* * @p is off scx_tasks and wholly ours. scx_root_enable()'s READY -> * ENABLED transitions can't race us. Disable ops for @p. + * + * %SCX_TASK_OFF_TASKS synchronizes against cgroup task iteration - see + * scx_task_iter_next_locked(). NONE tasks need no marking: cgroup + * iteration is only used from sub-sched paths, which require root + * enabled. Root enable transitions every live task to at least READY. */ if (scx_get_task_state(p) != SCX_TASK_NONE) { struct rq_flags rf; @@ -3855,6 +3873,7 @@ void sched_ext_dead(struct task_struct *p) rq = task_rq_lock(p, &rf); scx_disable_and_exit_task(scx_task_sched(p), p); + p->scx.flags |= SCX_TASK_OFF_TASKS; task_rq_unlock(rq, p, &rf); } }
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 7468560..6e19807 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c
@@ -466,12 +466,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, preempt_disable(); /* - * Check whether @prev_cpu is still within the allowed set. If not, - * we can still try selecting a nearby CPU. - */ - is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); - - /* * Determine the subset of CPUs usable by @p within @cpus_allowed. */ if (allowed != p->cpus_ptr) { @@ -488,6 +482,12 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, } /* + * Check whether @prev_cpu is still within the allowed set. If not, + * we can still try selecting a nearby CPU. + */ + is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); + + /* * This is necessary to protect llc_cpus. */ rcu_read_lock();
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7289658..3ebec18 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -882,11 +882,11 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) * * lag_i >= 0 -> V >= v_i * - * \Sum (v_i - v)*w_i - * V = ------------------ + v + * \Sum (v_i - v0)*w_i + * V = ------------------- + v0 * \Sum w_i * - * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i) + * lag_i >= 0 -> \Sum (v_i - v0)*w_i >= (v_i - v0)*(\Sum w_i) * * Note: using 'avg_vruntime() > se->vruntime' is inaccurate due * to the loss in precision caused by the division. @@ -894,7 +894,7 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) { struct sched_entity *curr = cfs_rq->curr; - s64 avg = cfs_rq->sum_w_vruntime; + s64 key, avg = cfs_rq->sum_w_vruntime; long load = cfs_rq->sum_weight; if (curr && curr->on_rq) { @@ -904,7 +904,36 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) load += weight; } - return avg >= vruntime_op(vruntime, "-", cfs_rq->zero_vruntime) * load; + key = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime); + + /* + * The worst case term for @key includes 'NSEC_TICK * NICE_0_LOAD' + * and @load obviously includes NICE_0_LOAD. NSEC_TICK is around 24 + * bits, while NICE_0_LOAD is 20 on 64bit and 10 otherwise. + * + * This gives that on 64bit the product will be at least 64bit which + * overflows s64, while on 32bit it will only be 44bits and should fit + * comfortably. + */ +#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_SUPPORTS_INT128 + /* This often results in simpler code than __builtin_mul_overflow(). */ + return avg >= (__int128)key * load; +#else + s64 rhs; + /* + * On overflow, the sign of key tells us the correct answer: a large + * positive key means vruntime >> V, so not eligible; a large negative + * key means vruntime << V, so eligible. + */ + if (check_mul_overflow(key, load, &rhs)) + return key <= 0; + + return avg >= rhs; +#endif +#else /* 32bit */ + return avg >= key * load; +#endif } int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -9145,9 +9174,10 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f /* * Because p is enqueued, nse being null can only mean that we - * dequeued a delayed task. + * dequeued a delayed task. If there are still entities queued in + * cfs, check if the next one will be p. */ - if (!nse) + if (!nse && cfs_rq->nr_queued) goto pick; if (sched_feat(RUN_TO_PARITY))
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 6234456..226a632 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c
@@ -199,7 +199,16 @@ static void ipi_rseq(void *info) * is negligible. */ smp_mb(); - rseq_sched_switch_event(current); + /* + * Legacy mode requires that IDs are written and the critical section is + * evaluated. V2 optimized mode handles the critical section and IDs are + * only updated if they change as a consequence of preemption after + * return from this IPI. + */ + if (rseq_v2(current)) + rseq_sched_switch_event(current); + else + rseq_force_update(); } static void ipi_sync_rq_state(void *info)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 155eeae..1d0d3a4 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c
@@ -1860,19 +1860,37 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node, * child to the new parents. So tmigr_active_up() activates the * new parents while walking up from the old root to the new. * - * * It is ensured that @start is active, as this setup path is - * executed in hotplug prepare callback. This is executed by an - * already connected and !idle CPU. Even if all other CPUs go idle, - * the CPU executing the setup will be responsible up to current top - * level group. And the next time it goes inactive, it will release - * the new childmask and parent to subsequent walkers through this - * @child. Therefore propagate active state unconditionally. + * * It is ensured that @start is active, (or on the way to be activated + * by another CPU that woke up before the current one) as this setup path + * is executed in hotplug prepare callback. This is executed by an already + * connected and !idle CPU in the hierarchy. + * + * * The below RmW atomic operation ensures that: + * + * 1) If the old root has been completely activated, the latest state is + * acquired (the below implicit acquire pairs with the implicit release + * from cmpxchg() in tmigr_active_up()). + * + * 2) If the old root is still on the way to be activated, the lagging behind + * CPU performing the activation will acquire the links up to the new root. + * (The below implicit release pairs with the implicit acquire from cmpxchg() + * in tmigr_active_up()). + * + * 3) Every subsequent CPU below the old root will acquire the new links while + * walking through the old root (The below implicit release pairs with the + * implicit acquire from cmpxchg() in either tmigr_active_up()) or + * tmigr_inactive_up(). */ - state.state = atomic_read(&start->migr_state); - WARN_ON_ONCE(!state.active); + state.state = atomic_fetch_or(0, &start->migr_state); WARN_ON_ONCE(!start->parent); - data.childmask = start->groupmask; - __walk_groups_from(tmigr_active_up, &data, start, start->parent); + /* + * If the state of the old root is inactive, another CPU is on its way to activate + * it and propagate to the new root. + */ + if (state.active) { + data.childmask = start->groupmask; + __walk_groups_from(tmigr_active_up, &data, start, start->parent); + } } /* Root update */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5f747f2..3d2e3b2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -5906,6 +5906,21 @@ static struct workqueue_struct *__alloc_workqueue(const char *fmt, return NULL; } +__printf(1, 0) +static struct workqueue_struct *alloc_workqueue_va(const char *fmt, + unsigned int flags, + int max_active, + va_list args) +{ + struct workqueue_struct *wq; + + wq = __alloc_workqueue(fmt, flags, max_active, args); + if (wq) + wq_init_lockdep(wq); + + return wq; +} + __printf(1, 4) struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, unsigned int flags, @@ -5915,12 +5930,8 @@ struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, va_list args; va_start(args, max_active); - wq = __alloc_workqueue(fmt, flags, max_active, args); + wq = alloc_workqueue_va(fmt, flags, max_active, args); va_end(args); - if (!wq) - return NULL; - - wq_init_lockdep(wq); return wq; } @@ -5932,15 +5943,15 @@ static void devm_workqueue_release(void *res) } __printf(2, 5) struct workqueue_struct * -devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, - int max_active, ...) +devm_alloc_workqueue_noprof(struct device *dev, const char *fmt, + unsigned int flags, int max_active, ...) { struct workqueue_struct *wq; va_list args; int ret; va_start(args, max_active); - wq = alloc_workqueue(fmt, flags, max_active, args); + wq = alloc_workqueue_va(fmt, flags, max_active, args); va_end(args); if (!wq) return NULL; @@ -5951,7 +5962,7 @@ devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, return wq; } -EXPORT_SYMBOL_GPL(devm_alloc_workqueue); +EXPORT_SYMBOL_GPL(devm_alloc_workqueue_noprof); #ifdef CONFIG_LOCKDEP __printf(1, 5)
diff --git a/lib/fonts/font_rotate.c b/lib/fonts/font_rotate.c index 065e0fc..27540600 100644 --- a/lib/fonts/font_rotate.c +++ b/lib/fonts/font_rotate.c
@@ -106,7 +106,7 @@ static void __font_glyph_rotate_180(const unsigned char *glyph, for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { if (font_glyph_test_bit(glyph, x, y, bit_pitch)) { - font_glyph_set_bit(out, width - (1 + x + shift), height - (1 + y), + font_glyph_set_bit(out, bit_pitch - 1 - x - shift, height - 1 - y, bit_pitch); } }
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 498cc51..94ff8e4 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig
@@ -16,8 +16,9 @@ if KUNIT config KUNIT_DEBUGFS - bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" if !KUNIT_ALL_TESTS - default KUNIT_ALL_TESTS + bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" + depends on DEBUG_FS + default y help Enable debugfs representation for kunit. Currently this consists of /sys/kernel/debug/kunit/<test_suite>/results files for each
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d..853c8d7 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c
@@ -330,11 +330,18 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: - case BNEP_FILTER_NET_TYPE_SET: - /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ - if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) + case BNEP_FILTER_NET_TYPE_SET: { + u8 *hdr; + + /* Pull ctrl type (1 b) + len (2 b) */ + hdr = skb_pull_data(skb, 3); + if (!hdr) + goto badframe; + /* Pull data (len bytes); length is big-endian */ + if (!skb_pull(skb, get_unaligned_be16(&hdr[1]))) goto badframe; break; + } default: kfree_skb(skb); return 0;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3a05925..17b46ad 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c
@@ -480,40 +480,107 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return hci_setup_sync_conn(conn, handle); } -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, - u16 to_multiplier) +struct le_conn_update_data { + struct hci_conn *conn; + u16 min; + u16 max; + u16 latency; + u16 to_multiplier; +}; + +static int le_conn_update_sync(struct hci_dev *hdev, void *data) { - struct hci_dev *hdev = conn->hdev; + struct le_conn_update_data *d = data; + struct hci_conn *conn = d->conn; struct hci_conn_params *params; struct hci_cp_le_conn_update cp; + u16 timeout; + u8 store_hint; + int err; + /* Verify connection is still alive and read conn fields under + * the same lock to prevent a concurrent disconnect from freeing + * or reusing the connection while we build the HCI command. + */ + hci_dev_lock(hdev); + + if (!hci_conn_valid(hdev, conn)) { + hci_dev_unlock(hdev); + return -ECANCELED; + } + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + cp.conn_interval_min = cpu_to_le16(d->min); + cp.conn_interval_max = cpu_to_le16(d->max); + cp.conn_latency = cpu_to_le16(d->latency); + cp.supervision_timeout = cpu_to_le16(d->to_multiplier); + cp.min_ce_len = cpu_to_le16(0x0000); + cp.max_ce_len = cpu_to_le16(0x0000); + timeout = conn->conn_timeout; + + hci_dev_unlock(hdev); + + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, + HCI_EV_LE_CONN_UPDATE_COMPLETE, + timeout, NULL); + if (err) + return err; + + /* Update stored connection parameters after the controller has + * confirmed the update via the LE Connection Update Complete event. + */ hci_dev_lock(hdev); params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { - params->conn_min_interval = min; - params->conn_max_interval = max; - params->conn_latency = latency; - params->supervision_timeout = to_multiplier; + params->conn_min_interval = d->min; + params->conn_max_interval = d->max; + params->conn_latency = d->latency; + params->supervision_timeout = d->to_multiplier; + store_hint = 0x01; + } else { + store_hint = 0x00; } hci_dev_unlock(hdev); - memset(&cp, 0, sizeof(cp)); - cp.handle = cpu_to_le16(conn->handle); - cp.conn_interval_min = cpu_to_le16(min); - cp.conn_interval_max = cpu_to_le16(max); - cp.conn_latency = cpu_to_le16(latency); - cp.supervision_timeout = cpu_to_le16(to_multiplier); - cp.min_ce_len = cpu_to_le16(0x0000); - cp.max_ce_len = cpu_to_le16(0x0000); + mgmt_new_conn_param(hdev, &conn->dst, conn->dst_type, store_hint, + d->min, d->max, d->latency, d->to_multiplier); - hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); + return 0; +} - if (params) - return 0x01; +static void le_conn_update_complete(struct hci_dev *hdev, void *data, int err) +{ + struct le_conn_update_data *d = data; - return 0x00; + hci_conn_put(d->conn); + kfree(d); +} + +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + struct le_conn_update_data *d; + + d = kzalloc_obj(*d); + if (!d) + return; + + hci_conn_get(conn); + d->conn = conn; + d->min = min; + d->max = max; + d->latency = latency; + d->to_multiplier = to_multiplier; + + if (hci_cmd_sync_queue(conn->hdev, le_conn_update_sync, d, + le_conn_update_complete) < 0) { + hci_conn_put(conn); + kfree(d); + } } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, @@ -2130,6 +2197,9 @@ static int create_big_sync(struct hci_dev *hdev, void *data) u32 flags = 0; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + if (qos->bcast.out.phys == BIT(1)) flags |= MGMT_ADV_FLAG_SEC_2M; @@ -2204,11 +2274,24 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "conn %p", conn); + if (err == -ECANCELED) + goto done; + + hci_dev_lock(hdev); + + if (!hci_conn_valid(hdev, conn)) + goto unlock; + if (err) { bt_dev_err(hdev, "Unable to create BIG: %d", err); hci_connect_cfm(conn, err); hci_conn_del(conn); } + +unlock: + hci_dev_unlock(hdev); +done: + hci_conn_put(conn); } struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, @@ -2336,10 +2419,11 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ - err = hci_cmd_sync_queue(hdev, create_big_sync, conn, + err = hci_cmd_sync_queue(hdev, create_big_sync, hci_conn_get(conn), create_big_complete); if (err < 0) { hci_conn_drop(conn); + hci_conn_put(conn); return ERR_PTR(err); }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b2ee6b6..eea2f81 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c
@@ -7118,9 +7118,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, continue; } - if (hci_conn_set_handle(conn, - __le16_to_cpu(ev->bis_handle[i++]))) + if (ev->num_bis <= i) { + bt_dev_err(hdev, + "Not enough BIS handles for BIG 0x%2.2x", + ev->handle); + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); continue; + } + + if (hci_conn_set_handle(conn, + __le16_to_cpu(ev->bis_handle[i++]))) { + bt_dev_err(hdev, + "Failed to set BIS handle for BIG 0x%2.2x", + ev->handle); + /* Force error so BIG gets terminated as not all BIS + * could be connected. + */ + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + continue; + } conn->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_CREATED, &conn->flags); @@ -7129,7 +7149,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, hci_iso_setup_path(conn); } - if (!ev->status && !i) + /* If there is an unexpected error or if no BISes have been connected + * for the BIG, terminate it. + */ + if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i)) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation @@ -7168,7 +7191,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); conn->num_bis = 0; - memset(conn->bis, 0, sizeof(conn->num_bis)); + memset(conn->bis, 0, sizeof(conn->bis)); for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7bcf8c5..976f91e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c
@@ -1036,6 +1036,28 @@ static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr) } /* + * Consume session->conn: clear the member under hidp_session_sem, then + * l2cap_unregister_user() and l2cap_conn_put() the snapshot outside the + * sem. At most one caller wins; later callers see NULL and skip. The + * reference is the one hidp_session_new() took via l2cap_conn_get(). + */ +static void hidp_session_unregister_conn(struct hidp_session *session) +{ + struct l2cap_conn *conn; + + down_write(&hidp_session_sem); + conn = session->conn; + if (conn) + session->conn = NULL; + up_write(&hidp_session_sem); + + if (conn) { + l2cap_unregister_user(conn, &session->user); + l2cap_conn_put(conn); + } +} + +/* * Start session synchronously * This starts a session thread and waits until initialization * is done or returns an error if it couldn't be started. @@ -1311,8 +1333,7 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - if (session->conn) - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); @@ -1418,7 +1439,7 @@ int hidp_connection_del(struct hidp_conndel_req *req) HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0); else - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index be145e2..7cb2864 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c
@@ -347,6 +347,7 @@ static int iso_connect_bis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!bis_capable(hdev)) { err = -EOPNOTSUPP; @@ -399,13 +400,9 @@ static int iso_connect_bis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -421,9 +418,8 @@ static int iso_connect_bis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -444,6 +440,7 @@ static int iso_connect_cis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!cis_central_capable(hdev)) { err = -EOPNOTSUPP; @@ -498,13 +495,9 @@ static int iso_connect_cis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -520,9 +513,8 @@ static int iso_connect_cis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1193,7 +1185,7 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, release_sock(sk); - if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + if (bacmp(&sa->iso_bdaddr, BDADDR_ANY)) err = iso_connect_cis(sk); else err = iso_connect_bis(sk); @@ -2256,8 +2248,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); if (sk && !ev1->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); iso_pi(sk)->bc_sid = ev1->sid; + release_sock(sk); } goto done; @@ -2268,8 +2262,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid_past, ev1a); if (sk && !ev1a->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1a->sync_handle); iso_pi(sk)->bc_sid = ev1a->sid; + release_sock(sk); } goto done; @@ -2296,27 +2292,35 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2); if (sk) { - int err; - struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + int err = 0; + bool big_sync; + struct hci_conn *hcon; + lock_sock(sk); + + hcon = iso_pi(sk)->conn->hcon; iso_pi(sk)->qos.bcast.encryption = ev2->encryption; if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; - if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && - !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { + big_sync = !test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && + !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags); + + if (big_sync) err = hci_conn_big_create_sync(hdev, hcon, &iso_pi(sk)->qos, iso_pi(sk)->sync_handle, iso_pi(sk)->bc_num_bis, iso_pi(sk)->bc_bis); - if (err) { - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); - sock_put(sk); - sk = NULL; - } + + release_sock(sk); + + if (big_sync && err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); + sock_put(sk); + sk = NULL; } } @@ -2370,8 +2374,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!base || base_len > BASE_MAX_LENGTH) goto done; + lock_sock(sk); memcpy(iso_pi(sk)->base, base, base_len); iso_pi(sk)->base_len = base_len; + release_sock(sk); } else { /* This is a PA data fragment. Keep pa_data_len set to 0 * until all data has been reassembled.
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 77dec10..7701528 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c
@@ -4706,16 +4706,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); - if (!err) { - u8 store_hint; - - store_hint = hci_le_conn_update(hcon, min, max, latency, - to_multiplier); - mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, - store_hint, min, max, latency, - to_multiplier); - - } + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); return 0; } @@ -5428,7 +5420,7 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, * configured, the MPS field may be less than the current MPS * of that channel. */ - if (chan[i]->remote_mps >= mps && i) { + if (chan[i]->remote_mps > mps && num_scid > 1) { BT_ERR("chan %p decreased MPS %u -> %u", chan[i], chan[i]->remote_mps, mps); result = L2CAP_RECONF_INVALID_MPS;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 71e8c1b..cf590a6 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c
@@ -1498,6 +1498,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; + if (!parent) + return NULL; + lock_sock(parent); /* Check for backlog size */ @@ -1657,6 +1660,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, { struct sock *sk = chan->data; + if (!sk) + return; + sk->sk_state = state; if (err) @@ -1758,6 +1764,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return 0; + return READ_ONCE(sk->sk_sndtimeo); }
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 611a9a9..d11bd53 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c
@@ -1715,9 +1715,12 @@ static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk } if (pf && d->cfc) { - u8 credits = *(u8 *) skb->data; skb_pull(skb, 1); + u8 *credits = skb_pull_data(skb, 1); - d->tx_credits += credits; + if (!credits) + goto drop; + + d->tx_credits += *credits; if (d->tx_credits) clear_bit(RFCOMM_TX_THROTTLED, &d->flags); }
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 18826d4..eba4452 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c
@@ -472,9 +472,13 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) sk1 = sk; } + sk = sk ? sk : sk1; + if (sk) + sock_hold(sk); + read_unlock(&sco_sk_list.lock); - return sk ? sk : sk1; + return sk; } static void sco_sock_destruct(struct sock *sk) @@ -515,11 +519,13 @@ static void sco_sock_kill(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); /* Sock is dead, so set conn->sk to NULL to avoid possible UAF */ + lock_sock(sk); if (sco_pi(sk)->conn) { sco_conn_lock(sco_pi(sk)->conn); sco_pi(sk)->conn->sk = NULL; sco_conn_unlock(sco_pi(sk)->conn); } + release_sock(sk); /* Kill poor orphan */ bt_sock_unlink(&sco_sk_list, sk); @@ -1365,40 +1371,51 @@ static int sco_sock_release(struct socket *sock) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent; - struct sock *sk = conn->sk; + struct sock *parent, *sk; + + sco_conn_lock(conn); + sk = sco_sock_hold(conn); + sco_conn_unlock(conn); BT_DBG("conn %p", conn); if (sk) { lock_sock(sk); - sco_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - release_sock(sk); - } else { - sco_conn_lock(conn); - if (!conn->hcon) { - sco_conn_unlock(conn); - return; + /* conn->sk may have become NULL if racing with sk close, but + * due to held hdev->lock, it can't become different sk. + */ + if (conn->sk) { + sco_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); } + release_sock(sk); + sock_put(sk); + } else { + if (!conn->hcon) + return; + + lockdep_assert_held(&conn->hcon->hdev->lock); + parent = sco_get_sock_listen(&conn->hcon->src); - if (!parent) { - sco_conn_unlock(conn); + if (!parent) return; - } lock_sock(parent); + sco_conn_lock(conn); + + /* hdev->lock guarantees conn->sk == NULL still here */ + + if (parent->sk_state != BT_LISTEN) + goto release; + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); - if (!sk) { - release_sock(parent); - sco_conn_unlock(conn); - return; - } + if (!sk) + goto release; sco_sock_init(sk, parent); @@ -1417,9 +1434,10 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - release_sock(parent); - +release: sco_conn_unlock(conn); + release_sock(parent); + sock_put(parent); } }
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 14eb781..ecd659f 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c
@@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); - if (!(smap->map.map_flags & BPF_F_CLONE)) + if (!smap || !(smap->map.map_flags & BPF_F_CLONE)) continue; /* Note that for lockless listeners adding new element @@ -531,10 +531,10 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); -static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) +static int diag_get(struct bpf_local_storage_map *smap, + struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; - struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); @@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) if (!nla_stg) return -EMSGSIZE; - smap = rcu_dereference(sdata->smap); if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) goto errout; @@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) sdata->data, true); else copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + check_and_init_map_value(&smap->map, nla_data(nla_value)); nla_nest_end(skb, nla_stg); return 0; @@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, saved_len = skb->len; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { smap = rcu_dereference(SDATA(selem)->smap); + if (!smap) + continue; diag_size += nla_value_size(smap->map.value_size); - if (nla_stgs && diag_get(SDATA(selem), skb)) + if (nla_stgs && diag_get(smap, SDATA(selem), skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } @@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, diag_size += nla_value_size(diag->maps[i]->value_size); - if (nla_stgs && diag_get(sdata, skb)) + if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; }
diff --git a/net/core/dev.c b/net/core/dev.c index 06c1959..8bfa831 100644 --- a/net/core/dev.c +++ b/net/core/dev.c
@@ -371,7 +371,7 @@ static void netdev_name_node_alt_free(struct rcu_head *head) static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) { netdev_name_node_del(name_node); - list_del(&name_node->list); + list_del_rcu(&name_node->list); call_rcu(&name_node->rcu, netdev_name_node_alt_free); }
diff --git a/net/core/filter.c b/net/core/filter.c index 80a3b70..9590877 100644 --- a/net/core/filter.c +++ b/net/core/filter.c
@@ -1654,15 +1654,24 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return err; } +static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + struct bpf_prog *prog = aux->prog; + + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + void sk_reuseport_prog_free(struct bpf_prog *prog) { if (!prog) return; - if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) - bpf_prog_put(prog); + if (bpf_prog_was_classic(prog)) + call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu); else - bpf_prog_destroy(prog); + bpf_prog_put(prog); } static inline int __bpf_try_make_writable(struct sk_buff *skb, @@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { - if (sk->sk_protocol != IPPROTO_TCP) + if (!sk_is_tcp(sk)) return -EINVAL; switch (optname) { @@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + /* + * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters + * CA_EVENT_TX_START in bpf_tcp_cc. + */ + if (level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = { + .func = bpf_sk_setsockopt_nodelay, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, if (!is_locked_tcp_sock_ops(bpf_sock)) return -EOPNOTSUPP; + /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */ + if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB || + bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) && + level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); } @@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, * against MTU of FIB lookup resulting net_device */ dev = dev_get_by_index_rcu(net, params->ifindex); + if (unlikely(!dev)) + return -ENODEV; if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; @@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; @@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) */ BTF_TYPE_EMIT(struct tcp6_sock); if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_family == AF_INET6) + sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6) return (unsigned long)sk; return (unsigned long)NULL; @@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4381e0f..84faace 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c
@@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf - * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + * is used to format np->dev_mac when np->dev_name is empty; bufsz must + * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address + * and its NUL terminator. */ -static char *egress_dev(struct netpoll *np, char *buf) +static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz) { if (np->dev_name[0]) return np->dev_name; - snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + snprintf(buf, bufsz, "%pM", np->dev_mac); return buf; } @@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EINVAL; } @@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return err; } @@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } @@ -736,7 +738,8 @@ int netpoll_setup(struct netpoll *np) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); + np_err(np, "%s doesn't exist, aborting\n", + egress_dev(np, buf, sizeof(buf))); err = -ENODEV; goto unlock; } @@ -744,14 +747,14 @@ int netpoll_setup(struct netpoll *np) if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = dev_open(ndev, NULL); if (err) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b613bb6..df042da 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c
@@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memset(&vf_broadcast, 0, sizeof(vf_broadcast)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 02a68be..99e3789 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c
@@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk) void (*saved_unhash)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_unhash = READ_ONCE(sk->sk_prot)->unhash; + if (unlikely(saved_unhash == sock_map_unhash)) + goto retry; } else { saved_unhash = psock->saved_unhash; sock_map_remove_links(sk, psock); rcu_read_unlock(); + + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) + return; } - if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) - return; + if (saved_unhash) saved_unhash(sk); } @@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk) void (*saved_destroy)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_destroy = READ_ONCE(sk->sk_prot)->destroy; + if (unlikely(saved_destroy == sock_map_destroy)) + goto retry; } else { saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); sk_psock_stop(psock); sk_psock_put(sk, psock); + + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) + return; } - if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) - return; + if (saved_destroy) saved_destroy(sk); } @@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout) void (*saved_close)(struct sock *sk, long timeout); struct sk_psock *psock; +retry: lock_sock(sk); rcu_read_lock(); - psock = sk_psock(sk); + psock = sk_psock_get(sk); if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); - psock = sk_psock_get(sk); - if (unlikely(!psock)) - goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + + /* Make sure we do not recurse. This is a bug. + * Leak the socket instead of crashing on a stack overflow. + */ + if (WARN_ON_ONCE(saved_close == sock_map_close)) + return; } else { saved_close = READ_ONCE(sk->sk_prot)->close; -no_psock: rcu_read_unlock(); release_sock(sk); + + if (unlikely(saved_close == sock_map_close)) + goto retry; } - /* Make sure we do not recurse. This is a bug. - * Leak the socket instead of crashing on a stack overflow. - */ - if (WARN_ON_ONCE(saved_close == sock_map_close)) - return; saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5fb81244..4366cba 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c
@@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err) struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); + int seqhi_len = 0; + __be32 *seqhi; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 008edc7..791e150 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c
@@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, */ if (prog_ops_moff(prog) != offsetof(struct tcp_congestion_ops, release)) - return &bpf_sk_setsockopt_proto; + return &bpf_sk_setsockopt_nodelay_proto; return NULL; case BPF_FUNC_getsockopt: /* Since get/setsockopt is usually expected to
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bc..6a5febb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c
@@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a674fb4..a9ad390 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c
@@ -122,16 +122,29 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +static bool IGMP_V1_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1) + return true; + seen = READ_ONCE(in_dev->mr_v1_seen); + return seen && time_before(jiffies, seen); +} + +static bool IGMP_V2_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2) + return true; + seen = READ_ONCE(in_dev->mr_v2_seen); + return seen && time_before(jiffies, seen); +} static int unsolicited_report_interval(struct in_device *in_dev) { @@ -954,23 +967,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int max_delay; int mark = 0; struct net *net = dev_net(in_dev->dev); - + unsigned long seen; if (len == 8) { + seen = jiffies + READ_ONCE(in_dev->mr_qrv) * READ_ONCE(in_dev->mr_qi) + + READ_ONCE(in_dev->mr_qri); if (ih->code == 0) { /* Alas, old v1 router presents here. */ max_delay = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_v1_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v1_seen, seen); group = 0; } else { /* v2 router present */ max_delay = ih->code*(HZ/IGMP_TIMER_SCALE); - in_dev->mr_v2_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v2_seen, seen); } /* cancel the interface change timer */ WRITE_ONCE(in_dev->mr_ifc_count, 0); @@ -995,6 +1006,8 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (!max_delay) max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ + unsigned long mr_qi; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return true; @@ -1015,15 +1028,16 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); - in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; - + WRITE_ONCE(in_dev->mr_qrv, + ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + WRITE_ONCE(in_dev->mr_qi, mr_qi); /* RFC3376, 8.3. Query Response Interval: * The number of seconds represented by the [Query Response * Interval] must be less than the [Query Interval]. */ - if (in_dev->mr_qri >= in_dev->mr_qi) - in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ; + if (READ_ONCE(in_dev->mr_qri) >= mr_qi) + WRITE_ONCE(in_dev->mr_qri, (mr_qi/HZ - 1) * HZ); if (!group) { /* general query */ if (ih3->nsrcs)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d8083b9..5b957a8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c
@@ -179,7 +179,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - if (p) + /* Make sure tree was not modified during our lookup. */ + if (p && !read_seqretry(&base->lock, seq)) return p; /* retry an exact lookup, taking the lock before.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e4790cc..5bcd73c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c
@@ -1233,6 +1233,8 @@ static int __ip_append_data(struct sock *sk, if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2058ca8..2628cd3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c
@@ -537,15 +537,16 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) }; int err; + rcu_read_lock(); err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { + rcu_read_unlock(); kfree_skb(skb); return err; } DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); - rcu_read_lock(); /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), @@ -1112,11 +1113,12 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg->im_vif_hi = vifi >> 8; ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); - /* Add our header */ - igmp = skb_put(skb, sizeof(struct igmphdr)); + /* Add our header. + * Note that code, csum and group fields are cleared. + */ + igmp = skb_put_zero(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; - igmp->code = 0; ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ skb->transport_header = skb->network_header; }
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 5080fa5..f9c6755 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -94,6 +94,9 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, #endif int doff = 0; + if (ntohs(iph->frag_off) & IP_OFFSET) + return NULL; + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8fc24c3..c0526cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c
@@ -1827,7 +1827,6 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; - struct sock *rsk; reason = psp_sk_rx_policy_check(sk, skb); if (reason) @@ -1863,24 +1862,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; if (nsk != sk) { reason = tcp_child_process(sk, nsk, skb); - if (reason) { - rsk = nsk; + sock_put(nsk); + if (reason) goto reset; - } return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); - if (reason) { - rsk = sk; + if (reason) goto reset; - } return 0; reset: - tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); + tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: sk_skb_reason_drop(sk, skb, reason); /* Be careful here. If this function gets more complicated and @@ -2193,8 +2189,10 @@ int tcp_v4_rcv(struct sk_buff *skb) rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v4_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 199f0b5..e6092c3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c
@@ -1012,6 +1012,6 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, } bh_unlock_sock(child); - sock_put(child); + return reason; }
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index c024aa7..c3806c6a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig
@@ -164,7 +164,7 @@ select INET_TUNNEL select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE - default y + default m help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -172,7 +172,7 @@ into IPv4 packets. This is useful if you want to connect two IPv6 networks over an IPv4-only path. - Saying M here will produce a module called sit. If unsure, say Y. + Saying M here will produce a module called sit. If unsure, say M. config IPV6_SIT_6RD bool "IPv6: IPv6 Rapid Deployment (6RD)"
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index cb26bee..de1e681 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c
@@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err) struct ipv6hdr *top_iph = ipv6_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); int ah_hlen = ipv6_authlen(ah); + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(seqhi, seqhi_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313..9c06c5a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c
@@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 49e31e4..9d06d48 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c
@@ -73,6 +73,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; + int exthdr_cnt = 0; *frag_offp = 0; @@ -82,6 +83,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -1; @@ -190,6 +193,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int exthdr_cnt = 0; bool found; if (fragoff) @@ -216,6 +220,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -EBADMSG; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -EBADMSG;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 63fc855..365b405 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c
@@ -2262,10 +2262,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct ip6_tnl *t; + struct ip6gre_net *ign; + ign = net_generic(t->net, ip6gre_net_id); t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 967b07a..8972863 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c
@@ -403,6 +403,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final) { + int exthdr_cnt = IP6CB(skb)->flags & IP6SKB_HOPBYHOP ? 1 : 0; const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; @@ -487,6 +488,10 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, nexthdr = ret; goto resubmit_final; } else { + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) { + SKB_DR_SET(reason, IPV6_TOO_MANY_EXTHDRS); + goto discard; + } goto resubmit; } } else if (ret == 0) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e92909..c14adcd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c
@@ -468,6 +468,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) default: break; } + hdr = ipv6_hdr(skb); } /* @@ -582,6 +583,8 @@ int ip6_forward(struct sk_buff *skb) if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); + + hdr = ipv6_hdr(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the @@ -1794,6 +1797,8 @@ static int __ip6_append_data(struct sock *sk, if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c468c83..9d1037a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c
@@ -399,11 +399,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 nexthdr = ipv6h->nexthdr; + int exthdr_cnt = 0; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + break; + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index ced8bd4..893f2ae 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -100,6 +100,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; + unsigned short fragoff = 0; int doff = 0; int thoff = 0, tproto; #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -107,8 +108,8 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, struct nf_conn const *ct; #endif - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NULL; }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 19eb6b7..e3d355d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c
@@ -1645,6 +1645,10 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); + if (!idev) { + rcu_read_unlock(); + return 0; + } mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -4995,6 +4999,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); break; }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c3f7a7..d13d49b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c
@@ -288,8 +288,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, saddr = &fl6->saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); - if (err) + if (err) { + dst_release(dst); goto failure; + } } /* set the source address */ @@ -1617,12 +1619,13 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); + if (!nsk) + return 0; if (nsk != sk) { - if (nsk) { - reason = tcp_child_process(sk, nsk, skb); - if (reason) - goto reset; - } + reason = tcp_child_process(sk, nsk, skb); + sock_put(nsk); + if (reason) + goto reset; return 0; } } else @@ -1827,8 +1830,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; }
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805..9b586fc 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c
@@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); - if (dst->error) + if (dst->error) { + dst_release(dst); goto drop; + } skb_dst_set(skb, dst); }
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 160ae65..0a0f278 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c
@@ -438,6 +438,15 @@ ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); /* + * Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in + * their basic HT-MCS set. On cards with lower spatial streams, the check + * would fail, and we'd be stuck with no HT when it in fact work fine with + * its own supported rate. So check it only in strict mode. + */ + if (!ieee80211_hw_check(&sdata->local->hw, STRICT)) + return true; + + /* * P802.11REVme/D7.0 - 6.5.4.2.4 * ... * If the MLME of an HT STA receives an MLME-JOIN.request primitive @@ -9140,7 +9149,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_link_data *link; - bool have_sta = false; + struct sta_info *have_sta = NULL; bool mlo; int err; u16 new_links; @@ -9159,11 +9168,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = false; } - if (assoc) { - rcu_read_lock(); + if (assoc) have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } if (mlo && !have_sta && WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) @@ -9327,6 +9333,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_release_chan: ieee80211_link_release_channel(link); out_err: + if (mlo && have_sta) + WARN_ON(__sta_info_destroy(have_sta)); ieee80211_vif_set_links(sdata, 0, 0); return err; }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3e5d1c4..d18e962 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c
@@ -4971,7 +4971,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - static ieee80211_rx_result res; + ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -5380,7 +5380,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - ieee80211_rx_data_set_link(&rx, link_sta->link_id); + if (!ieee80211_rx_data_set_link(&rx, + link_sta->link_id)) + goto out; } if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index adc0690..fa37083 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c
@@ -65,6 +65,7 @@ static const struct determine_chan_mode_case { .ht_capa_mask = { .mcs.rx_mask[0] = 0xf7, }, + .strict = true, }, { .desc = "Masking out a RX rate in VHT capabilities", .conn_mode = IEEE80211_CONN_MODE_EHT,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b093bc2..2529b01 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c
@@ -3700,11 +3700,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); struct cfg80211_chan_def chandef; - struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *ctx, *tmp; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { + list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue;
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index e103364..e4b230e 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c
@@ -920,9 +920,9 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) static void mctp_test_route_input_null_eid(struct kunit *test) { struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sockaddr_mctp addr = { 0 }; struct sk_buff *skb_pkt, *skb_sk; struct mctp_test_dev *dev; - struct sockaddr_mctp addr; struct socket *sock; u8 type = 0; int rc;
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index c3987d5..6eef8d4 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c
@@ -116,7 +116,7 @@ void mctp_test_destroy_dev(struct mctp_test_dev *dev) static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { skb->dev = dst->dev->dev; - dev_queue_xmit(skb); + dev_direct_xmit(skb, 0); return 0; }
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672..4cc16cb 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c
@@ -14,7 +14,7 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk)) return mptcp_sk(mptcp_subflow_ctx(sk)->conn); return NULL;
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 82ec15b..082c46c 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c
@@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + bool has_rxtstamp; /* on early fallback the subflow context is deleted by * subflow_syn_recv_sock() @@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf */ tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 57a4566..3c152bf 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c
@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; + bool timer_done; struct timer_list add_timer; struct mptcp_sock *sock; struct rcu_head rcu; @@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; + if (!__mptcp_subflow_active(subflow)) + continue; + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) const struct net *net = sock_net((struct sock *)msk); unsigned int rto = mptcp_get_add_addr_timeout(net); struct mptcp_subflow_context *subflow; - unsigned int max = 0; + unsigned int max = 0, max_stale = 0; + + if (!rto) + return 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); - if (icsk->icsk_rto > max) + if (!__mptcp_subflow_active(subflow)) + continue; + + if (unlikely(subflow->stale)) { + if (icsk->icsk_rto > max_stale) + max_stale = icsk->icsk_rto; + } else if (icsk->icsk_rto > max) { max = icsk->icsk_rto; + } } - if (max && max < rto) - rto = max; + if (max) + return min(max, rto); + + if (max_stale) + return min(max_stale, rto); return rto; } @@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - unsigned int timeout; + unsigned int timeout = 0; pr_debug("msk=%p\n", msk); - if (!msk) - return; + bh_lock_sock(sk); + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + goto out; - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - if (!entry->addr.id) - return; - - if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + timeout = HZ / 20; goto out; } timeout = mptcp_adjust_add_addr_timeout(msk); - if (!timeout) + if (!timeout || mptcp_pm_should_add_signal_addr(msk)) goto out; spin_lock_bh(&msk->pm.lock); @@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + (timeout << entry->retrans_times)); + timeout <<= entry->retrans_times; + else + timeout = 0; spin_unlock_bh(&msk->pm.lock); @@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: - __sock_put(sk); + if (timeout) + sk_reset_timer(sk, timer, jiffies + timeout); + else + /* if sock_put calls sk_free: avoid waiting for this timer */ + entry->timer_done = true; + bh_unlock_sock(sk); + sock_put(sk); } struct mptcp_pm_add_entry * @@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: + add_entry->timer_done = false; timeout = mptcp_adjust_add_addr_timeout(msk); if (timeout) sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); @@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); + if (!entry->timer_done) + sk_stop_timer_sync(sk, &entry->add_timer); kfree_rcu(entry, rcu); } }
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index c9f1e5a..fc818b6 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c
@@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < endp_signal_max) { + u8 endp_id; + /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (!select_signal_address(pernet, msk, &local)) goto subflow; + /* Special case for ID0: set the correct ID */ + endp_id = local.addr.id; + if (endp_id == msk->mpc_endpoint_id) + local.addr.id = 0; + /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + __clear_bit(endp_id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_addr_send_ack(msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0efe40b..1cf608e 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c
@@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, if (ret) break; } + + if (!ret) + sockopt_seq_inc(msk); + return ret; }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e2cb9d2..d562e14 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c
@@ -581,7 +581,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup); if (!subflow_thmac_valid(subflow)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -908,7 +908,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2082bfb..9ea6b4f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -267,27 +267,20 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash_key2 = hash_key; use2 = false; } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, &head, &head2); - spin_lock(&cp->lock); - if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - cp->flags |= IP_VS_CONN_F_HASHED; - WRITE_ONCE(cp->hn0.hash_key, hash_key); - WRITE_ONCE(cp->hn1.hash_key, hash_key2); - refcount_inc(&cp->refcnt); - hlist_bl_add_head_rcu(&cp->hn0.node, head); - if (use2) - hlist_bl_add_head_rcu(&cp->hn1.node, head2); - ret = 1; - } else { - pr_err("%s(): request for already hashed, called from %pS\n", - __func__, __builtin_return_address(0)); - ret = 0; - } + cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); + refcount_inc(&cp->refcnt); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); - spin_unlock(&cp->lock); conn_tab_unlock(head, head2); + ret = 1; /* Schedule resizing if load increases */ if (atomic_read(&ipvs->conn_count) > t->u_thresh && @@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, &head, &head2); - spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ @@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) } } - spin_unlock(&cp->lock); conn_tab_unlock(head, head2); rcu_read_unlock(); @@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) struct ip_vs_conn_hnode *hn; u32 hash_key, hash_key_new; struct ip_vs_conn_param p; + bool by_me = false; int ntbl; int dir; @@ -664,8 +656,16 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) t = rcu_dereference(t->new_tbl); ntbl++; /* We are lost? */ - if (ntbl >= 2) + if (ntbl >= 2) { + spin_lock_bh(&cp->lock); + if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me) + cp->cport = 0; + /* hn1 will be rehashed on next packet */ + spin_unlock_bh(&cp->lock); + IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n", + __func__, dir); return; + } } /* Rehashing during resize? Use the recent table for adds */ @@ -683,10 +683,13 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) if (head > head2 && t == t2) swap(head, head2); + /* Protect the cp->flags modification */ + spin_lock_bh(&cp->lock); + /* Lock seqcount only for the old bucket, even if we are on new table * because it affects the del operation, not the adding. */ - spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_lock(&t->lock[hash_key & t->lock_mask].l); preempt_disable_nested(); write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); @@ -704,14 +707,23 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + spin_unlock_bh(&cp->lock); hash_key = hash_key_new; goto retry; } - spin_lock(&cp->lock); - if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && - (cp->flags & IP_VS_CONN_F_HASHED)) { + /* Fill cport once, even if multiple packets try to do it */ + if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) { + /* If we race with resizing make sure cport is set for dir 1 */ + if (!cp->cport) { + cp->cport = cport; + by_me = true; + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + } /* We do not recalc hash_key_r under lock, we assume the * parameters in cp do not change, i.e. cport is * the only possible change. @@ -726,21 +738,17 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) hlist_bl_del_rcu(&hn->node); hlist_bl_add_head_rcu(&hn->node, head_new); } - if (!dir) { - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; - } } - spin_unlock(&cp->lock); if (head != head2) hlist_bl_unlock(head2); hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); - if (dir--) + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + + spin_unlock_bh(&cp->lock); + if (dir-- && by_me) goto next_dir; } @@ -1835,7 +1843,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) if (!rcu_dereference_protected(ipvs->conn_tab, 1)) return; - cancel_delayed_work_sync(&ipvs->conn_resize_work); + disable_delayed_work_sync(&ipvs->conn_resize_work); if (!atomic_read(&ipvs->conn_count)) goto unreg;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f5b7a20..d40b404 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, { if (!t) return 1 << min_bits; - n = roundup_pow_of_two(n); + n = n > 0 ? roundup_pow_of_two(n) : 1; if (lfactor < 0) { int factor = min(-lfactor, max_bits);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6632daa..c7c7f6a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work) if (!kd) continue; /* New config ? Stop kthread tasks */ - if (genid != genid_done) - ip_vs_est_kthread_stop(kd); + if (genid != genid_done) { + if (!id) { + /* Only we can stop kt 0 but not under mutex */ + mutex_unlock(&ipvs->est_mutex); + ip_vs_est_kthread_stop(kd); + mutex_lock(&ipvs->est_mutex); + if (!READ_ONCE(ipvs->enable)) + goto unlock; + /* kd for kt 0 is never destroyed */ + } else { + ip_vs_est_kthread_stop(kd); + } + } if (!kd->task && !ip_vs_est_stopped(ipvs)) { + bool start; + /* Do not start kthreads above 0 in calc phase */ - if ((!id || !ipvs->est_calc_phase) && - ip_vs_est_kthread_start(ipvs, kd) < 0) + if (id) + start = !ipvs->est_calc_phase; + else + start = kd->needed; + if (start && ip_vs_est_kthread_start(ipvs, kd) < 0) repeat = true; } } @@ -1102,6 +1118,24 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, return dest; } +/* Put destination in trash */ +static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest, unsigned long istart, + bool cleanup) +{ + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + refcount_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); + /* dest lives in trash with reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = istart; + spin_unlock_bh(&ipvs->dest_trash_lock); +} + static void ip_vs_dest_rcu_free(struct rcu_head *head) { struct ip_vs_dest *dest; @@ -1461,9 +1495,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ntohs(dest->vport)); ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + /* On error put back dest into the trash */ if (ret < 0) - return ret; - __ip_vs_update_dest(svc, dest, udest, 1); + ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start, + false); + else + __ip_vs_update_dest(svc, dest, udest, 1); } else { /* * Allocate and initialize the dest structure @@ -1533,17 +1570,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - spin_lock_bh(&ipvs->dest_trash_lock); - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - refcount_read(&dest->refcnt)); - if (list_empty(&ipvs->dest_trash) && !cleanup) - mod_timer(&ipvs->dest_trash_timer, - jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash with reference */ - list_add(&dest->t_list, &ipvs->dest_trash); - dest->idle_start = 0; - spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_trash_put_dest(ipvs, dest, 0, cleanup); /* Queue up delayed work to expire all no destination connections. * No-op when CONFIG_SYSCTL is disabled. @@ -1812,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { + mutex_lock(&ipvs->est_mutex); + /* Now there is a service - full throttle */ WRITE_ONCE(ipvs->enable, 1); + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + /* Start estimation for first time */ - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); } return 0; @@ -2032,6 +2064,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc) cancel_delayed_work_sync(&ipvs->svc_resize_work); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2078,6 +2113,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2086,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = p; } } + /* Stop the tot_stats estimator early under service_mutex + * to avoid locking it again later. + */ + if (cleanup) + ip_vs_stop_estimator_tot_stats(ipvs); return 0; } @@ -2331,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, /* est_max_threads may depend on cpulist size */ ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); ipvs->est_calc_phase = 1; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); unlock: mutex_unlock(&ipvs->est_mutex); @@ -2351,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, mutex_lock(&ipvs->est_mutex); - if (ipvs->est_cpulist_valid) - mask = *valp; - else - mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); - ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + /* HK_TYPE_KTHREAD cpumask needs RCU protection */ + scoped_guard(rcu) { + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + } mutex_unlock(&ipvs->est_mutex); @@ -2411,7 +2457,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2438,7 +2484,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2463,7 +2509,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; + WRITE_ONCE(*valp, val); if (rcu_access_pointer(ipvs->conn_tab)) mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); @@ -2490,10 +2536,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; - if (rcu_access_pointer(ipvs->svc_table)) + mutex_lock(&ipvs->service_mutex); + WRITE_ONCE(*valp, val); + /* Make sure the services are present */ + if (rcu_access_pointer(ipvs->svc_table) && + READ_ONCE(ipvs->enable) && + !test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) mod_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 0); + mutex_unlock(&ipvs->service_mutex); } } return ret; @@ -3004,7 +3056,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) int old_gen, new_gen; u32 counts[8]; u32 bucket; - int count; + u32 count; + int loops; u32 sum1; u32 sum; int i; @@ -3020,6 +3073,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) if (!atomic_read(&ipvs->conn_count)) goto after_conns; old_gen = atomic_read(&ipvs->conn_tab_changes); + loops = 0; repeat_conn: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ @@ -3032,8 +3086,11 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) resched_score++; ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; - hlist_bl_for_each_entry_rcu(hn, e, head, node) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3042,37 +3099,41 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) new_gen = atomic_read(&ipvs->conn_tab_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_conns; old_gen = new_gen; goto repeat_conn; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_conns: t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); - seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Services:\t%u\n", count); seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); + loops = 0; repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ @@ -3086,8 +3147,11 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; hlist_bl_for_each_entry_rcu(svc, e, head, - s_list) + s_list) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3096,24 +3160,27 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) new_gen = atomic_read(&ipvs->svc_table_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_svc; old_gen = new_gen; goto repeat_svc; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_svc: @@ -4967,7 +5034,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + if (ipvs->tot_stats->s.est.ktid != -2) { + /* Not stopped yet? This happens only on netns init error and + * we even do not need to lock the service_mutex for this case. + */ + mutex_lock(&ipvs->service_mutex); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + mutex_unlock(&ipvs->service_mutex); + } if (ipvs->est_cpulist_valid) free_cpumask_var(ipvs->sysctl_est_cpulist); @@ -5039,7 +5113,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; - if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net, ip_vs_status_show, NULL)) goto err_status; #endif
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 433ba3c..ab09f51 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -68,6 +68,11 @@ and the limit of estimators per kthread - est_add_ktid: ktid where to add new ests, can point to empty slot where we should add kt data + - data protected by service_mutex: est_temp_list, est_add_ktid, + est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W) + - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist, + est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation, + est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0) */ static struct lock_class_key __ipvs_est_key; @@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data) } /* Schedule stop/start for kthread tasks */ -void ip_vs_est_reload_start(struct netns_ipvs *ipvs) +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart) { + lockdep_assert_held(&ipvs->est_mutex); + /* Ignore reloads before first service is added */ if (!READ_ONCE(ipvs->enable)) return; ip_vs_est_stopped_recalc(ipvs); - /* Bump the kthread configuration genid */ - atomic_inc(&ipvs->est_genid); + /* Bump the kthread configuration genid if stopping is requested */ + if (restart) + atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); } @@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) void *arr = NULL; int i; - if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && - READ_ONCE(ipvs->enable) && ipvs->est_max_threads) - return -EINVAL; - mutex_lock(&ipvs->est_mutex); + /* Allow kt 0 data to be created before the services are added + * and limit the kthreads when services are present. + */ + if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && + READ_ONCE(ipvs->enable) && ipvs->est_max_threads) { + ret = -EINVAL; + goto out; + } + for (i = 0; i < id; i++) { if (!ipvs->est_kt_arr[i]) break; @@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) kd->est_timer = jiffies; kd->id = id; ip_vs_est_set_params(ipvs, kd); + kd->needed = 1; /* Pre-allocate stats used in calc phase */ if (!id && !kd->calc_stats) { @@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) goto out; } - /* Start kthread tasks only when services are present */ - if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) { - ret = ip_vs_est_kthread_start(ipvs, kd); - if (ret < 0) - goto out; - } + /* Request kthread to be started */ + ip_vs_est_reload_start(ipvs, false); if (arr) ipvs->est_kt_count++; @@ -482,12 +492,11 @@ static int ip_vs_enqueue_estimator(struct netns_ipvs *ipvs, /* Start estimation for stats */ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { + struct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ? + ipvs->est_kt_arr[0] : NULL; struct ip_vs_estimator *est = &stats->est; int ret; - if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable)) - ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); - est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ @@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) * will not allocate much memory, just for kt 0. */ ret = 0; - if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0]) + if (!kd) { ret = ip_vs_est_add_kthread(ipvs); + } else if (!kd->needed) { + mutex_lock(&ipvs->est_mutex); + /* We have job for the kt 0 task */ + kd->needed = 1; + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); + } if (ret >= 0) hlist_add_head(&est->list, &ipvs->est_temp_list); else @@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) } end_kt0: - /* kt 0 is freed after all other kthreads and chains are empty */ + /* kt 0 task is stopped after all other kt slots and chains are empty */ if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) { kd = ipvs->est_kt_arr[0]; - if (!kd || !kd->est_count) { + if (kd && !kd->est_count) { mutex_lock(&ipvs->est_mutex); - if (kd) { - ip_vs_est_kthread_destroy(kd); - ipvs->est_kt_arr[0] = NULL; - } - ipvs->est_kt_count--; + /* Keep the kt0 data but request kthread_stop */ + kd->needed = 0; + ip_vs_est_reload_start(ipvs, true); mutex_unlock(&ipvs->est_mutex); ipvs->est_add_ktid = 0; } @@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&ipvs->service_mutex); + mutex_lock(&ipvs->est_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&ipvs->service_mutex); + mutex_unlock(&ipvs->est_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&ipvs->service_mutex); - /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period */ mutex_lock(&ipvs->est_mutex); for (id = 1; id < ipvs->est_kt_count; id++) { /* netns clean up started, abort */ - if (!READ_ONCE(ipvs->enable)) - goto unlock2; + if (kthread_should_stop() || !READ_ONCE(ipvs->enable)) { + mutex_unlock(&ipvs->est_mutex); + return; + } kd = ipvs->est_kt_arr[id]; if (!kd) continue; @@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) } mutex_unlock(&ipvs->est_mutex); + mutex_lock(&ipvs->service_mutex); + /* Move all estimators to est_temp_list but carefully, * all estimators and kthread data can be released while - * we reschedule. Even for kthread 0. + * we reschedule. */ step = 0; @@ -849,9 +865,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) ip_vs_stop_estimator(ipvs, stats); /* Tasks are stopped, move without RCU grace period */ est->ktid = -1; - est->ktrow = row - kd->est_row; - if (est->ktrow < 0) - est->ktrow += IPVS_EST_NTICKS; + est->ktrow = delay; hlist_add_head(&est->list, &ipvs->est_temp_list); /* kd freed ? */ if (last) @@ -889,7 +903,6 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (genid == atomic_read(&ipvs->est_genid)) ipvs->est_calc_phase = 0; -unlock2: mutex_unlock(&ipvs->est_mutex); unlock:
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index e348fb9..3b0a70e1 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c
@@ -13,22 +13,6 @@ #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> -#define NF_RECURSION_LIMIT 2 - -#ifndef CONFIG_PREEMPT_RT -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); -} -#else - -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return ¤t->net_xmit.nf_dup_skb_recursion; -} - -#endif - static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev, enum nf_dev_hooks hook) {
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2c4140e..785d8c2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c
@@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->tun = route->tuple[dir].in.tun; flow_tuple->encap_num = route->tuple[dir].in.num_encaps; + flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment; flow_tuple->tun_num = route->tuple[dir].in.num_tuns; switch (route->tuple[dir].xmit_type) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index fd56d66..9c05a50 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c
@@ -445,13 +445,13 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, switch (skb->protocol) { case htons(ETH_P_8021Q): vlan_hdr = (struct vlan_hdr *)skb->data; - __skb_pull(skb, VLAN_HLEN); + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vlan_hdr); skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): skb->protocol = __nf_flow_pppoe_proto(skb); - skb_pull(skb, PPPOE_SES_HLEN); + skb_pull_rcsum(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; } @@ -462,23 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, nf_flow_ip_tunnel_pop(ctx, skb); } -struct nf_flow_xmit { - const void *dest; - const void *source; - struct net_device *outdev; -}; - -static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, - struct nf_flow_xmit *xmit) -{ - skb->dev = xmit->outdev; - dev_hard_header(skb, skb->dev, ntohs(skb->protocol), - xmit->dest, xmit->source, skb->len); - dev_queue_xmit(skb); - - return NF_STOLEN; -} - static struct flow_offload_tuple_rhash * nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) @@ -524,7 +507,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -544,7 +527,34 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 1; } -static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) +/* Similar to skb_vlan_push. */ +static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id, + u32 needed_headroom) +{ + if (skb_vlan_tag_present(skb)) { + struct vlan_hdr *vhdr; + + if (skb_cow_head(skb, needed_headroom + VLAN_HLEN)) + return -1; + + __skb_push(skb, VLAN_HLEN); + if (skb_mac_header_was_set(skb)) + skb->mac_header -= VLAN_HLEN; + + vhdr = (struct vlan_hdr *)skb->data; + skb->network_header -= VLAN_HLEN; + vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + vhdr->h_vlan_encapsulated_proto = skb->protocol; + skb->protocol = skb->vlan_proto; + skb_postpush_rcsum(skb, skb->data, VLAN_HLEN); + } + __vlan_hwaccel_put_tag(skb, proto, id); + + return 0; +} + +static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id, + u32 needed_headroom) { int data_len = skb->len + sizeof(__be16); struct ppp_hdr { @@ -553,7 +563,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) } *ph; __be16 proto; - if (skb_cow_head(skb, PPPOE_SES_HLEN)) + if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN)) return -1; switch (skb->protocol) { @@ -730,21 +740,24 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, } static int nf_flow_encap_push(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct net_device *outdev) { + u32 needed_headroom = LL_RESERVED_SPACE(outdev); int i; - for (i = 0; i < tuple->encap_num; i++) { + for (i = tuple->encap_num - 1; i >= 0; i--) { switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): - skb_reset_mac_header(skb); - if (skb_vlan_push(skb, tuple->encap[i].proto, - tuple->encap[i].id) < 0) + if (nf_flow_vlan_push(skb, tuple->encap[i].proto, + tuple->encap[i].id, + needed_headroom) < 0) return -1; break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) + if (nf_flow_pppoe_push(skb, tuple->encap[i].id, + needed_headroom) < 0) return -1; break; } @@ -753,6 +766,76 @@ static int nf_flow_encap_push(struct sk_buff *skb, return 0; } +struct nf_flow_xmit { + const void *dest; + const void *source; + struct net_device *outdev; + struct flow_offload_tuple *tuple; + bool needs_gso_segment; +}; + +static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct net_device *dev = xmit->outdev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) + return; + } + + skb->dev = dev; + dev_hard_header(skb, dev, ntohs(skb->protocol), + xmit->dest, xmit->source, skb->len); + dev_queue_xmit(skb); +} + +static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct sk_buff *segs, *nskb; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) + return NF_DROP; + + if (segs) + consume_skb(skb); + else + segs = skb; + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + + if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) { + kfree_skb(segs); + kfree_skb_list(nskb); + return NF_STOLEN; + } + __nf_flow_queue_xmit(net, segs, xmit); + } + + return NF_STOLEN; +} + +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + if (xmit->tuple->encap_num) { + if (skb_is_gso(skb) && xmit->needs_gso_segment) + return nf_flow_encap_gso_xmit(net, skb, xmit); + + if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0) + return NF_DROP; + } + + __nf_flow_queue_xmit(net, skb, xmit); + + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -797,9 +880,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rtable(tuplehash->tuple.dst_cache); @@ -829,6 +909,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } @@ -1037,7 +1119,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -1119,9 +1201,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, &ip6_daddr, encap_limit) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rt6_info(tuplehash->tuple.dst_cache); @@ -1151,6 +1230,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); }
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index 6bb9579..9e88ea6 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c
@@ -86,6 +86,7 @@ struct nft_forward_info { u8 ingress_vlans; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + bool needs_gso_segment; enum flow_offload_xmit_type xmit_type; }; @@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, path->encap.proto; info->num_encaps++; } - if (path->type == DEV_PATH_PPPOE) + if (path->type == DEV_PATH_PPPOE) { memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; + info->needs_gso_segment = 1; + } break; case DEV_PATH_BRIDGE: if (is_zero_ether_addr(info->h_source)) @@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].xmit_type = info.xmit_type; } + route->tuple[dir].out.needs_gso_segment = info.needs_gso_segment; } int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d20ce5c..87387ad 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c
@@ -407,6 +407,7 @@ static void nft_netdev_unregister_trans_hook(struct net *net, } static void nft_netdev_unregister_hooks(struct net *net, + const struct nft_table *table, struct list_head *hook_list, bool release_netdev) { @@ -414,8 +415,10 @@ static void nft_netdev_unregister_hooks(struct net *net, struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { - list_for_each_entry(ops, &hook->ops_list, list) - nf_unregister_net_hook(net, ops); + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } if (release_netdev) nft_netdev_hook_unlink_free_rcu(hook); } @@ -452,20 +455,25 @@ static void __nf_tables_unregister_hook(struct net *net, struct nft_base_chain *basechain; const struct nf_hook_ops *ops; - if (table->flags & NFT_TABLE_F_DORMANT || - !nft_is_base_chain(chain)) + if (!nft_is_base_chain(chain)) return; basechain = nft_base_chain(chain); ops = &basechain->ops; + /* must also be called for dormant tables */ + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { + nft_netdev_unregister_hooks(net, table, &basechain->hook_list, + release_netdev); + return; + } + + if (table->flags & NFT_TABLE_F_DORMANT) + return; + if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list, - release_netdev); - else - nf_unregister_net_hook(net, &basechain->ops); + nf_unregister_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -4205,6 +4213,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) struct nft_chain *chain; struct nft_ctx ctx = { .net = net, + .table = (struct nft_table *)table, .family = table->family, }; int err = 0; @@ -11281,11 +11290,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_hooks(net, table, + &nft_trans_chain_hooks(trans), + true); free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 5ddd5b6..8ab186f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c
@@ -153,7 +153,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb) + pkt->nhoff; else { - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return false; ptr = skb->data + nft_thoff(pkt); }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index decc725..0caa930 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c
@@ -261,10 +261,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { if (ret == -ENOENT) { @@ -353,8 +353,6 @@ static int nft_target_dump(struct sk_buff *skb, static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_target *target = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -377,11 +375,21 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgchk_param par; + union nft_entry e = {}; - hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; + nft_target_set_tgchk_param(&par, ctx, target, info, &e, 0, false); + + ret = xt_check_hooks_target(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; @@ -515,10 +523,10 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + return xt_check_match(&par, size, proto, inv); } @@ -614,8 +622,6 @@ static int nft_match_large_dump(struct sk_buff *skb, static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_match *match = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -638,11 +644,30 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_match *match = expr->ops->data; + size_t size = XT_ALIGN(match->matchsize); + struct xt_mtchk_param par; + union nft_entry e = {}; + void *info; - hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; + if (NFT_EXPR_SIZE(size) > NFT_MATCH_LARGE_THRESH) { + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + info = priv->info; + } else { + info = nft_expr_priv(expr); + } + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, 0, false); + + ret = xt_check_hooks_match(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 0407d6f..e6a07c0 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c
@@ -376,7 +376,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; - if (pkt->tprot != IPPROTO_SCTP) + if (pkt->tprot != IPPROTO_SCTP || pkt->fragoff) goto err; do {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 4bce36c..b9e88d7 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c
@@ -95,12 +95,15 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion(); struct nft_fwd_neigh *priv = nft_expr_priv(expr); void *addr = ®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; unsigned int verdict = NF_STOLEN; struct sk_buff *skb = pkt->skb; + int nhoff = skb_network_offset(skb); struct net_device *dev; + unsigned int hh_len; int neigh_table; switch (priv->nfproto) { @@ -111,7 +114,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*iph))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*iph))) { verdict = NF_DROP; goto out; } @@ -132,7 +135,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*ip6h))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*ip6h))) { verdict = NF_DROP; goto out; } @@ -151,13 +154,31 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } + if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) { + verdict = NF_DROP; + goto out; + } + dev = dev_get_by_index_rcu(nft_net(pkt), oif); - if (dev == NULL) - return; + if (dev == NULL) { + verdict = NF_DROP; + goto out; + } + + hh_len = LL_RESERVED_SPACE(dev); + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) { + verdict = NF_STOLEN; + goto out; + } + } skb->dev = dev; skb_clear_tstamp(skb); + (*nf_dup_skb_recursion)++; neigh_xmit(neigh_table, dev, addr, skb); + (*nf_dup_skb_recursion)--; out: regs->verdict.code = verdict; }
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index c02d5cb..45fe56d 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c
@@ -33,7 +33,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - if (pkt->tprot != IPPROTO_TCP) { + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; }
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index f2101af..89be443 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c
@@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, __be16 tport = 0; struct sock *sk; - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } @@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, memset(&taddr, 0, sizeof(taddr)); - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9f837fb..2c67c2e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c
@@ -477,11 +477,9 @@ int xt_check_proc_name(const char *name, unsigned int size) } EXPORT_SYMBOL(xt_check_proc_name); -int xt_check_match(struct xt_mtchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_match_common(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->match->matchsize) != size && par->match->matchsize != -1) { /* @@ -530,6 +528,14 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->proto); return -EINVAL; } + + return 0; +} + +static int xt_checkentry_match(struct xt_mtchk_param *par) +{ + int ret; + if (par->match->checkentry != NULL) { ret = par->match->checkentry(par); if (ret < 0) @@ -538,8 +544,34 @@ int xt_check_match(struct xt_mtchk_param *par, /* Flag up potential errors. */ return -EIO; } + return 0; } + +int xt_check_hooks_match(struct xt_mtchk_param *par) +{ + if (par->match->check_hooks != NULL) + return par->match->check_hooks(par); + + return 0; +} +EXPORT_SYMBOL_GPL(xt_check_hooks_match); + +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_match_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_match(par); + if (ret < 0) + return ret; + + return xt_checkentry_match(par); +} EXPORT_SYMBOL_GPL(xt_check_match); /** xt_check_entry_match - check that matches end before start of target @@ -1012,11 +1044,9 @@ bool xt_find_jump_offset(const unsigned int *offsets, } EXPORT_SYMBOL(xt_find_jump_offset); -int xt_check_target(struct xt_tgchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_target_common(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->target->targetsize) != size) { pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, @@ -1061,6 +1091,23 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->proto); return -EINVAL; } + + return 0; +} + +int xt_check_hooks_target(struct xt_tgchk_param *par) +{ + if (par->target->check_hooks != NULL) + return par->target->check_hooks(par); + + return 0; +} +EXPORT_SYMBOL_GPL(xt_check_hooks_target); + +static int xt_checkentry_target(struct xt_tgchk_param *par) +{ + int ret; + if (par->target->checkentry != NULL) { ret = par->target->checkentry(par); if (ret < 0) @@ -1071,6 +1118,22 @@ int xt_check_target(struct xt_tgchk_param *par, } return 0; } + +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_target_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_target(par); + if (ret < 0) + return ret; + + return xt_checkentry_target(par); +} EXPORT_SYMBOL_GPL(xt_check_target); /**
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 498f587..d2aeacf 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c
@@ -354,7 +354,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -366,7 +366,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -398,7 +398,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -410,7 +410,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 116a885..80e1634 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c
@@ -247,6 +247,21 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static int tcpmss_tg4_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + + return 0; +} + /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { @@ -262,17 +277,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m) static int tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -286,17 +293,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -312,6 +311,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV4, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, .targetsize = sizeof(struct xt_tcpmss_info), @@ -322,6 +322,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV6, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, .targetsize = sizeof(struct xt_tcpmss_info),
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e4bea1d..5f60e72 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c
@@ -86,6 +86,9 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -95,6 +98,9 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -106,6 +112,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + unsigned short fragoff = 0; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; @@ -113,8 +120,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index a770889..913dbe3 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c
@@ -153,14 +153,10 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +static int addrtype_mt_check_hooks(const struct xt_mtchk_param *par) { - const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && @@ -176,6 +172,21 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) goto err; } + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { @@ -211,6 +222,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE @@ -221,6 +233,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index 9520dd0..6d1a44a 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c
@@ -33,14 +33,10 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } -static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +static int devgroup_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_devgroup_info *info = par->matchinfo; - if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | - XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) - return -EINVAL; - if (info->flags & XT_DEVGROUP_MATCH_SRC && par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | @@ -56,9 +52,21 @@ static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) return 0; } +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + return 0; +} + static struct xt_match devgroup_mt_reg __read_mostly = { .name = "devgroup", .match = devgroup_mt, + .check_hooks = devgroup_mt_check_hooks, .checkentry = devgroup_mt_checkentry, .matchsize = sizeof(struct xt_devgroup_info), .family = NFPROTO_UNSPEC,
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index b96e820..a8503f5 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c
@@ -30,6 +30,10 @@ static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) struct tcphdr _tcph; const struct tcphdr *th; + /* this is fine for IPv6 as ecn_mt_check6() enforces -p tcp */ + if (par->fragoff) + return false; + /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3bd127b..2704b4b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c
@@ -658,6 +658,8 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; + if (ntohs(ip_hdr(skb)->frag_off) & IP_OFFSET) + return -1; nexthdr = ip_hdr(skb)->protocol; break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -681,7 +683,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if ((int)protoff < 0) + if ((int)protoff < 0 || ntohs(frag_off) & IP6_OFFSET) return -1; break; }
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index dc94858..e8807ca 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c
@@ -27,6 +27,9 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) { + if (p->fragoff) + return false; + return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p), xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers); }
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index d2b0b52..dd98f75 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c
@@ -91,6 +91,21 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } +static int physdev_mt_check_hooks(const struct xt_mtchk_param *par) +{ + const struct xt_physdev_info *info = par->matchinfo; + + if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && + (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || + info->invert & XT_PHYSDEV_OP_BRIDGED) && + par->hook_mask & (1 << NF_INET_LOCAL_OUT)) { + pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); + return -EINVAL; + } + + return 0; +} + static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; @@ -99,13 +114,6 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return -EINVAL; - if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && - (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || - info->invert & XT_PHYSDEV_OP_BRIDGED) && - par->hook_mask & (1 << NF_INET_LOCAL_OUT)) { - pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); - return -EINVAL; - } #define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) if (info->bitmask & XT_PHYSDEV_OP_IN) { @@ -141,6 +149,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV4, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info), @@ -149,6 +158,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV6, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info),
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index b5fa655..ff54e3a 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c
@@ -126,13 +126,10 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int policy_mt_check(const struct xt_mtchk_param *par) +static int policy_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; - const char *errmsg = "neither incoming nor outgoing policy selected"; - - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { @@ -144,6 +141,21 @@ static int policy_mt_check(const struct xt_mtchk_param *par) errmsg = "input policy not valid in POSTROUTING and OUTPUT"; goto err; } + + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int policy_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; + if (info->len > XT_POLICY_MAX_ELEM) { errmsg = "too many policy elements"; goto err; @@ -158,6 +170,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV4, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -166,6 +179,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV6, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info),
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 731bc2c..4ae04bb 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c
@@ -431,6 +431,29 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) } static int +set_target_v3_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_info_ratelimited("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + (par->hook_mask & ~(1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + } + + return 0; +} + +static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; @@ -459,20 +482,6 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) } if (info->map_set.index != IPSET_INVALID_ID) { - if (strncmp(par->table, "mangle", 7)) { - pr_info_ratelimited("--map-set only usable from mangle table\n"); - ret = -EINVAL; - goto cleanup_del; - } - if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | - (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && - (par->hook_mask & ~(1 << NF_INET_FORWARD | - 1 << NF_INET_LOCAL_OUT | - 1 << NF_INET_POST_ROUTING))) { - pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); - ret = -EINVAL; - goto cleanup_del; - } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { @@ -672,6 +681,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV4, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE @@ -682,6 +692,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV6, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0d32d48..b9da826 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c
@@ -32,6 +32,10 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) u8 _opt[15 * 4 - sizeof(_tcph)]; unsigned int i, optlen; + /* this is fine for IPv6 as xt_tcpmss enforces -p tcp */ + if (par->fragoff) + return false; + /* If we don't have the whole header, drop packet. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL)
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index b10e160..cb5ea442 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c
@@ -97,6 +97,9 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -111,7 +114,7 @@ static struct vport *geneve_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_geneve_vport_ops = {
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4014c9b..6cb5a69 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c
@@ -63,6 +63,9 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_PTR(err); } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; } @@ -75,7 +78,7 @@ static struct vport *gre_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_gre_vport_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 12055af..c4264207 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c
@@ -73,37 +73,21 @@ static struct net_device *get_dpdev(const struct datapath *dp) return local->dev; } -struct vport *ovs_netdev_link(struct vport *vport, const char *name) +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel) { int err; - vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name); - if (!vport->dev) { + if (WARN_ON_ONCE(!vport->dev)) { err = -ENODEV; goto error_free_vport; } - /* Ensure that the device exists and that the provided - * name is not one of its aliases. - */ - if (strcmp(name, ovs_vport_name(vport))) { - err = -ENODEV; - goto error_put; - } - netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); - if (vport->dev->flags & IFF_LOOPBACK || - (vport->dev->type != ARPHRD_ETHER && - vport->dev->type != ARPHRD_NONE) || - ovs_is_internal_dev(vport->dev)) { - err = -EINVAL; - goto error_put; - } rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, get_dpdev(vport->dp), NULL, NULL, NULL); if (err) - goto error_unlock; + goto error_put_unlock; err = netdev_rx_handler_register(vport->dev, netdev_frame_hook, vport); @@ -119,10 +103,11 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) error_master_upper_dev_unlink: netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp)); -error_unlock: - rtnl_unlock(); -error_put: +error_put_unlock: + if (tunnel && vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev, 0, NULL); netdev_put(vport->dev, &vport->dev_tracker); + rtnl_unlock(); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -132,12 +117,39 @@ EXPORT_SYMBOL_GPL(ovs_netdev_link); static struct vport *netdev_create(const struct vport_parms *parms) { struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms); if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); + if (!vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); + + /* Ensure that the provided name is not an alias. */ + if (strcmp(parms->name, ovs_vport_name(vport))) { + err = -ENODEV; + goto error_put; + } + + if (vport->dev->flags & IFF_LOOPBACK || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || + ovs_is_internal_dev(vport->dev)) { + err = -EINVAL; + goto error_put; + } + + return ovs_netdev_link(vport, false); +error_put: + netdev_put(vport->dev, &vport->dev_tracker); +error_free_vport: + ovs_vport_free(vport); + return ERR_PTR(err); } static void vport_netdev_free(struct rcu_head *rcu) @@ -196,9 +208,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - rtnl_unlock(); + /* We can't put the device reference yet, since it can still be in + * use, but rtnl_unlock()->netdev_run_todo() will block until all + * the references are released, so the RCU call must be before it. + */ call_rcu(&vport->rcu, vport_netdev_free); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index c5d83a4..6c0d736 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h
@@ -13,7 +13,7 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); -struct vport *ovs_netdev_link(struct vport *vport, const char *name); +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 0b881b0..c1b37b5 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c
@@ -126,6 +126,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -140,7 +143,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_vxlan_netdev_vport_ops = {
diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 9508b6c..e45549f 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c
@@ -263,15 +263,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate); /* Receive handler for PSP packets. * - * Presently it accepts only already-authenticated packets and does not - * support optional fields, such as virtualization cookies. The caller should - * ensure that skb->data is pointing to the mac header, and that skb->mac_len - * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE - * is not supported). + * Accepts only already-authenticated packets. The full PSP header is + * stripped according to psph->hdrlen; any optional fields it advertises + * (virtualization cookies, etc.) are ignored and discarded along with the + * rest of the header. The caller should ensure that skb->data is pointing + * to the mac header, and that skb->mac_len is set. This function does not + * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported). */ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) { - int l2_hlen = 0, l3_hlen, encap; + int l2_hlen = 0, l3_hlen, encap, psp_hlen; struct psp_skb_ext *pse; struct psphdr *psph; struct ethhdr *eth; @@ -312,18 +313,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT))) return -EINVAL; - pse = skb_ext_add(skb, SKB_EXT_PSP); - if (!pse) + psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + + sizeof(struct udphdr)); + + /* Strip the full PSP header per psph->hdrlen; VC/options are pulled + * into the linear region only so they can be discarded with the + * rest of the header. + */ + psp_hlen = (psph->hdrlen + 1) * 8; + + if (unlikely(psp_hlen < sizeof(struct psphdr))) + return -EINVAL; + + if (psp_hlen > sizeof(struct psphdr) && + !pskb_may_pull(skb, l2_hlen + l3_hlen + + sizeof(struct udphdr) + psp_hlen)) return -EINVAL; psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + sizeof(struct udphdr)); + + pse = skb_ext_add(skb, SKB_EXT_PSP); + if (!pse) + return -EINVAL; + pse->spi = psph->spi; pse->dev_id = dev_id; pse->generation = generation; pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl); - encap = PSP_ENCAP_HLEN; + encap = sizeof(struct udphdr) + psp_hlen; encap += strip_icv ? PSP_TRL_SIZE : 0; if (proto == htons(ETH_P_IP)) { @@ -340,8 +359,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap); } - memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen); - skb_pull(skb, PSP_ENCAP_HLEN); + memmove(skb->data + sizeof(struct udphdr) + psp_hlen, + skb->data, l2_hlen + l3_hlen); + skb_pull(skb, sizeof(struct udphdr) + psp_hlen); if (strip_icv) pskb_trim(skb, skb->len - PSP_TRL_SIZE);
diff --git a/net/rds/message.c b/net/rds/message.c index eaa6f22..25fedcb 100644 --- a/net/rds/message.c +++ b/net/rds/message.c
@@ -131,24 +131,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, */ static void rds_message_purge(struct rds_message *rm) { + struct rds_znotifier *znotifier; unsigned long i, flags; - bool zcopy = false; + bool zcopy; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); + znotifier = rm->data.op_mmp_znotifier; + rm->data.op_mmp_znotifier = NULL; + zcopy = !!znotifier; + if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; - if (rm->data.op_mmp_znotifier) { - zcopy = true; - rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + if (znotifier) { + rds_rm_zerocopy_callback(rs, znotifier); rds_wake_sk_sleep(rs); - rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; + } else if (znotifier) { + /* + * Zerocopy can fail before the message is queued on the + * socket, so there is no rs to carry the notification. + */ + mm_unaccount_pinned_pages(&znotifier->z_mmp); + kfree(rds_info_from_znotifier(znotifier)); } spin_unlock_irqrestore(&rm->m_rs_lock, flags);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fdd6832..2b19b25 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c
@@ -334,7 +334,9 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.securityIndex != 0 && - skb_cloned(skb)) { + (skb_cloned(skb) || + skb_has_frag_list(skb) || + skb_has_shared_frag(skb))) { /* Unshare the packet so that it can be * modified by in-place decryption. */
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a2130d2..442414d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c
@@ -245,7 +245,8 @@ static int rxrpc_verify_response(struct rxrpc_connection *conn, { int ret; - if (skb_cloned(skb)) { + if (skb_cloned(skb) || skb_has_frag_list(skb) || + skb_has_shared_frag(skb)) { /* Copy the packet if shared so that we can do in-place * decryption. */
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 13c6d18..5862933 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c
@@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ -static void cobalt_newton_step(struct cobalt_vars *vars) +static void cobalt_newton_step(struct cobalt_vars *vars, u32 count) { u32 invsqrt, invsqrt2; u64 val; invsqrt = vars->rec_inv_sqrt; invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - val = (3LL << 32) - ((u64)vars->count * invsqrt2); + val = (3LL << 32) - ((u64)count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); @@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars) vars->rec_inv_sqrt = val; } -static void cobalt_invsqrt(struct cobalt_vars *vars) +static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count) { - if (vars->count < REC_INV_SQRT_CACHE) - vars->rec_inv_sqrt = inv_sqrt_cache[vars->count]; + if (count < REC_INV_SQRT_CACHE) + vars->rec_inv_sqrt = inv_sqrt_cache[count]; else - cobalt_newton_step(vars); + cobalt_newton_step(vars, count); } static void cobalt_vars_init(struct cobalt_vars *vars) @@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars, bool up = false; if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { - up = !vars->p_drop; - vars->p_drop += p->p_inc; - if (vars->p_drop < p->p_inc) - vars->p_drop = ~0; - vars->blue_timer = now; + u32 p_drop = vars->p_drop; + + up = !p_drop; + p_drop += p->p_inc; + if (p_drop < p->p_inc) + p_drop = ~0; + WRITE_ONCE(vars->p_drop, p_drop); + WRITE_ONCE(vars->blue_timer, now); } - vars->dropping = true; - vars->drop_next = now; + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, now); if (!vars->count) - vars->count = 1; + WRITE_ONCE(vars->count, 1); return up; } @@ -475,20 +478,20 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, if (vars->p_drop && ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { if (vars->p_drop < p->p_dec) - vars->p_drop = 0; + WRITE_ONCE(vars->p_drop, 0); else - vars->p_drop -= p->p_dec; - vars->blue_timer = now; + WRITE_ONCE(vars->p_drop, vars->p_drop - p->p_dec); + WRITE_ONCE(vars->blue_timer, now); down = !vars->p_drop; } - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->count, vars->count - 1); + cobalt_invsqrt(vars, vars->count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); } return down; @@ -507,6 +510,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, bool next_due, over_target; ktime_t schedule; u64 sojourn; + u32 count; /* The 'schedule' variable records, in its sign, whether 'now' is before or * after 'drop_next'. This allows 'drop_next' to be updated before the next @@ -528,21 +532,22 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, over_target = sojourn > p->target && sojourn > p->mtu_time * bulk_flows * 2 && sojourn > p->mtu_time * 4; - next_due = vars->count && ktime_to_ns(schedule) >= 0; + count = vars->count; + next_due = count && ktime_to_ns(schedule) >= 0; vars->ecn_marked = false; if (over_target) { if (!vars->dropping) { - vars->dropping = true; - vars->drop_next = cobalt_control(now, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, + cobalt_control(now, p->interval, + vars->rec_inv_sqrt)); } - if (!vars->count) - vars->count = 1; + if (!count) + count = 1; } else if (vars->dropping) { - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } if (next_due && vars->dropping) { @@ -550,23 +555,23 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) reason = QDISC_DROP_CONGESTED; - vars->count++; - if (!vars->count) - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count++; + if (!count) + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); } else { while (next_due) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); - next_due = vars->count && ktime_to_ns(schedule) >= 0; + next_due = count && ktime_to_ns(schedule) >= 0; } } @@ -575,11 +580,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, get_random_u32() < vars->p_drop) reason = QDISC_DROP_FLOOD_PROTECTION; + WRITE_ONCE(vars->count, count); /* Overload the drop_next field as an activity timeout */ - if (!vars->count) - vars->drop_next = ktime_add_ns(now, p->interval); + if (!count) + WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval)); else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) - vars->drop_next = now; + WRITE_ONCE(vars->drop_next, now); return reason; } @@ -914,7 +920,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) struct sk_buff *skb = flow->head; if (skb) { - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); } @@ -926,7 +932,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) { if (!flow->head) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -1357,7 +1363,7 @@ static struct sk_buff *cake_ack_filter(struct cake_sched_data *q, if (elig_ack_prev) elig_ack_prev->next = elig_ack->next; else - flow->head = elig_ack->next; + WRITE_ONCE(flow->head, elig_ack->next); skb_mark_not_on_list(elig_ack); @@ -1595,11 +1601,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; - b->backlogs[idx] -= len; WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len); sch->qstats.backlog -= len; - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) @@ -1824,11 +1830,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->backlogs[idx] += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; WRITE_ONCE(b->bytes, b->bytes + slen); WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1861,11 +1867,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* stats */ WRITE_ONCE(b->packets, b->packets + 1); - b->backlogs[idx] += len - ack_pkt_len; sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1924,7 +1930,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow->set = CAKE_SET_SPARSE; WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); - flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, cake_get_flow_quantum(b, flow, q->config->flow_mode)); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. @@ -1977,7 +1983,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) if (flow->head) { skb = dequeue_head(flow); len = qdisc_pkt_len(skb); - b->backlogs[q->cur_flow] -= len; + WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len); WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; @@ -2166,7 +2172,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) } } - flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, + flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode)); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2232,10 +2239,10 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) if (q->config->rate_flags & CAKE_FLAG_INGRESS) { len = cake_advance_shaper(q, b, skb, now, true); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; } - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); @@ -2259,7 +2266,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { @@ -3137,7 +3144,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, flow = &b->flows[idx % CAKE_QUEUES]; - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -3146,13 +3153,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = b->backlogs[idx % CAKE_QUEUES]; - qs.drops = flow->dropped; + qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]); + qs.drops = READ_ONCE(flow->dropped); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (flow) { ktime_t now = ktime_get(); + bool dropping; + u32 p_drop; stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) @@ -3167,21 +3176,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; \ } while (0) - PUT_STAT_S32(DEFICIT, flow->deficit); - PUT_STAT_U32(DROPPING, flow->cvars.dropping); - PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); - PUT_STAT_U32(P_DROP, flow->cvars.p_drop); - if (flow->cvars.p_drop) { + PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit)); + dropping = READ_ONCE(flow->cvars.dropping); + PUT_STAT_U32(DROPPING, dropping); + PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count)); + p_drop = READ_ONCE(flow->cvars.p_drop); + PUT_STAT_U32(P_DROP, p_drop); + if (p_drop) { PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + READ_ONCE(flow->cvars.blue_timer)))); } - if (flow->cvars.dropping) { + if (dropping) { PUT_STAT_S32(DROP_NEXT_US, ktime_to_us( ktime_sub(now, - flow->cvars.drop_next))); + READ_ONCE(flow->cvars.drop_next)))); } if (nla_nest_end(d->skb, stats) < 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0664b2f..24db546 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c
@@ -117,7 +117,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) { struct sk_buff *skb = flow->head; - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); return skb; } @@ -127,7 +127,7 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, struct sk_buff *skb) { if (flow->head == NULL) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -173,8 +173,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, } while (++i < max_packets && len < threshold); /* Tell codel to increase its signal strength also */ - flow->cvars.count += i; - q->backlogs[idx] -= len; + WRITE_ONCE(flow->cvars.count, flow->cvars.count + i); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] - len); q->memory_usage -= mem; sch->qstats.drops += i; sch->qstats.backlog -= len; @@ -204,13 +204,13 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, codel_set_enqueue_time(skb); flow = &q->flows[idx]; flow_queue_add(flow, skb); - q->backlogs[idx] += qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] + qdisc_pkt_len(skb)); qdisc_qstats_backlog_inc(sch, skb); if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; - flow->deficit = q->quantum; + WRITE_ONCE(flow->deficit, q->quantum); } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -263,7 +263,8 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) flow = container_of(vars, struct fq_codel_flow, cvars); if (flow->head) { skb = dequeue_head(flow); - q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[flow - q->flows], + q->backlogs[flow - q->flows] - qdisc_pkt_len(skb)); q->memory_usage -= get_codel_cb(skb)->mem_usage; sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -296,7 +297,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) flow = list_first_entry(head, struct fq_codel_flow, flowchain); if (flow->deficit <= 0) { - flow->deficit += q->quantum; + WRITE_ONCE(flow->deficit, flow->deficit + q->quantum); list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } @@ -314,7 +315,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) goto begin; } qdisc_bstats_update(sch, skb); - flow->deficit -= qdisc_pkt_len(skb); + WRITE_ONCE(flow->deficit, flow->deficit - qdisc_pkt_len(skb)); if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, @@ -328,7 +329,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) static void fq_codel_flow_purge(struct fq_codel_flow *flow) { rtnl_kfree_skbs(flow->head, flow->tail); - flow->head = NULL; + WRITE_ONCE(flow->head, NULL); } static void fq_codel_reset(struct Qdisc *sch) @@ -656,21 +657,21 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; - xstats.class_stats.deficit = flow->deficit; + xstats.class_stats.deficit = READ_ONCE(flow->deficit); xstats.class_stats.ldelay = - codel_time_to_us(flow->cvars.ldelay); - xstats.class_stats.count = flow->cvars.count; - xstats.class_stats.lastcount = flow->cvars.lastcount; - xstats.class_stats.dropping = flow->cvars.dropping; - if (flow->cvars.dropping) { - codel_tdiff_t delta = flow->cvars.drop_next - + codel_time_to_us(READ_ONCE(flow->cvars.ldelay)); + xstats.class_stats.count = READ_ONCE(flow->cvars.count); + xstats.class_stats.lastcount = READ_ONCE(flow->cvars.lastcount); + xstats.class_stats.dropping = READ_ONCE(flow->cvars.dropping); + if (xstats.class_stats.dropping) { + codel_tdiff_t delta = READ_ONCE(flow->cvars.drop_next) - codel_get_time(); xstats.class_stats.drop_next = (delta >= 0) ? codel_time_to_us(delta) : -codel_time_to_us(-delta); } - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -679,7 +680,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = q->backlogs[idx]; + qs.backlog = READ_ONCE(q->backlogs[idx]); qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index fb53fbf..b41f2de 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c
@@ -219,16 +219,14 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * packet timestamp. */ if (!params->dq_rate_estimator) { - vars->qdelay = now - pie_get_enqueue_time(skb); + WRITE_ONCE(vars->qdelay, + backlog ? now - pie_get_enqueue_time(skb) : 0); if (vars->dq_tstamp != DTIME_INVALID) dtime = now - vars->dq_tstamp; vars->dq_tstamp = now; - if (backlog == 0) - vars->qdelay = 0; - if (dtime == 0) return; @@ -376,7 +374,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) delta += MAX_PROB / (100 / 2); - vars->prob += delta; + WRITE_ONCE(vars->prob, vars->prob + delta); if (delta > 0) { /* prevent overflow */ @@ -401,7 +399,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay == 0 && qdelay_old == 0 && update_prob) /* Reduce drop probability to 98.4% */ - vars->prob -= vars->prob / 64; + WRITE_ONCE(vars->prob, vars->prob - vars->prob / 64); WRITE_ONCE(vars->qdelay, qdelay); vars->backlog_old = backlog; @@ -501,7 +499,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { - .prob = q->vars.prob << BITS_PER_BYTE, + .prob = READ_ONCE(q->vars.prob) << BITS_PER_BYTE, .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / NSEC_PER_USEC, .packets_in = READ_ONCE(q->stats.packets_in), @@ -512,7 +510,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) }; /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ - st.dq_rate_estimating = q->params.dq_rate_estimator; + st.dq_rate_estimating = READ_ONCE(q->params.dq_rate_estimator); /* unscale and return dq_rate in bytes per sec */ if (st.dq_rate_estimating)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 432b8a30..4d0e44a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c
@@ -162,7 +162,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; - skb = child->dequeue(child); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bd5ef56..d3ee8e5 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c
@@ -441,7 +441,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; struct sk_buff *skb; - skb = child->dequeue(q->qdisc); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c3f3181..f39822b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c
@@ -225,7 +225,8 @@ static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = q->slots[x].qlen--; + d = q->slots[x].qlen; + WRITE_ONCE(q->slots[x].qlen, d - 1); if (n == p && q->cur_depth == d) q->cur_depth--; sfq_link(q, x); @@ -238,7 +239,8 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = ++q->slots[x].qlen; + d = q->slots[x].qlen + 1; + WRITE_ONCE(q->slots[x].qlen, d); if (q->cur_depth < d) q->cur_depth = d; sfq_link(q, x); @@ -298,7 +300,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - slot->backlog -= len; + WRITE_ONCE(slot->backlog, slot->backlog - len); sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -314,7 +316,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) q->tail = NULL; /* no more active slots */ else q->tail->next = slot->next; - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); goto drop; } @@ -364,10 +366,10 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; - slot->backlog = 0; /* should already be 0 anyway... */ + WRITE_ONCE(slot->backlog, 0); /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } @@ -426,7 +428,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; - slot->backlog -= delta; + WRITE_ONCE(slot->backlog, slot->backlog - delta); qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); @@ -436,7 +438,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) enqueue: qdisc_qstats_backlog_inc(sch, skb); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -452,7 +454,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; @@ -489,7 +491,7 @@ sfq_dequeue(struct Qdisc *sch) slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; - slot->allot += q->quantum; + WRITE_ONCE(slot->allot, slot->allot + q->quantum); goto next_slot; } skb = slot_dequeue_head(slot); @@ -497,10 +499,10 @@ sfq_dequeue(struct Qdisc *sch) qdisc_bstats_update(sch, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - slot->backlog -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog - qdisc_pkt_len(skb)); /* Is the slot empty? */ if (slot->qlen == 0) { - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); next_a = slot->next; if (a == next_a) { q->tail = NULL; /* no more active slots */ @@ -508,7 +510,7 @@ sfq_dequeue(struct Qdisc *sch) } q->tail->next = next_a; } else { - slot->allot -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->allot, slot->allot - qdisc_pkt_len(skb)); } return skb; } @@ -549,9 +551,9 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } - slot->backlog = 0; + WRITE_ONCE(slot->backlog, 0); red_set_vars(&slot->vars); - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); } q->tail = NULL; @@ -570,7 +572,7 @@ static void sfq_rehash(struct Qdisc *sch) dropped++; continue; } - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; } @@ -581,7 +583,7 @@ static void sfq_rehash(struct Qdisc *sch) slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -591,7 +593,7 @@ static void sfq_rehash(struct Qdisc *sch) q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } } sch->q.qlen -= dropped; @@ -905,16 +907,16 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl - 1]; + sfq_index idx = READ_ONCE(q->ht[cl - 1]); struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; - qs.qlen = slot->qlen; - qs.backlog = slot->backlog; + xstats.allot = READ_ONCE(slot->allot); + qs.qlen = READ_ONCE(slot->qlen); + qs.backlog = READ_ONCE(slot->backlog); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; @@ -930,7 +932,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT) { + if (READ_ONCE(q->ht[i]) == SFQ_EMPTY_SLOT) { arg->count++; continue; }
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1a56509..185dbed 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c
@@ -1628,12 +1628,8 @@ static void smc_connect_work(struct work_struct *work) lock_sock(&smc->sk); if (rc != 0 || smc->sk.sk_err) { smc->sk.sk_state = SMC_CLOSED; - if (rc == -EPIPE || rc == -EAGAIN) - smc->sk.sk_err = EPIPE; - else if (rc == -ECONNREFUSED) - smc->sk.sk_err = ECONNREFUSED; - else if (signal_pending(current)) - smc->sk.sk_err = -sock_intr_errno(timeo); + if (!smc->sk.sk_err) + smc->sk.sk_err = (rc == -EAGAIN) ? EPIPE : -rc; sock_put(&smc->sk); /* passive closing */ goto out; }
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 798243e..2590e85 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c
@@ -2317,9 +2317,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (copied < 0) goto splice_requeue; - if (chunk < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; + if (copied < rxm->full_len) { + rxm->offset += copied; + rxm->full_len -= copied; goto splice_requeue; }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d787c..1cbf36e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c
@@ -3323,6 +3323,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sk_buff *skb; int answ = 0; + if (sk->sk_type != SOCK_STREAM) + return -EOPNOTSUPP; + mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a7967a3..0783555 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c
@@ -607,6 +607,8 @@ static void unix_gc(struct work_struct *work) struct sk_buff_head hitlist; struct sk_buff *skb; + WRITE_ONCE(gc_in_progress, true); + spin_lock(&unix_gc_lock); if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { @@ -649,10 +651,8 @@ void unix_schedule_gc(struct user_struct *user) READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) return; - if (!READ_ONCE(gc_in_progress)) { - WRITE_ONCE(gc_in_progress, true); + if (!READ_ONCE(gc_in_progress)) queue_work(system_dfl_wq, &unix_gc_work); - } if (user && READ_ONCE(unix_graph_cyclic_sccs)) flush_work(&unix_gc_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 416d533..9b80145 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c
@@ -447,7 +447,9 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->buf_used + len > vvs->buf_alloc) + u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + + if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f334cdef..7db9cd4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c
@@ -1276,6 +1276,18 @@ static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, rtnl_unlock(); return -ENODEV; } + + /* + * The first invocation validated the wdev's netns against + * the caller via __cfg80211_wdev_from_attrs(). The wiphy + * may have moved netns between dumpit invocations (via + * NL80211_CMD_SET_WIPHY_NETNS), so re-check here. + */ + if (!net_eq(wiphy_net(wiphy), sock_net(cb->skb->sk))) { + rtnl_unlock(); + return -ENODEV; + } + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -13867,6 +13879,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(net)) return PTR_ERR(net); + /* + * The caller already has CAP_NET_ADMIN over the source netns + * (enforced by GENL_UNS_ADMIN_PERM on the genl op). Mirror the + * convention used by net/core/rtnetlink.c::rtnl_get_net_ns_capable() + * and require CAP_NET_ADMIN over the target netns as well, so that + * a caller that is privileged in their own user namespace cannot + * push a wiphy into a netns where they have no privilege. + */ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return -EPERM; + } + err = 0; /* check if anything to do */ @@ -19828,6 +19853,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -19835,6 +19861,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, {
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 4c8ea05..d6cd0de 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) out->ftm.ftms_per_burst = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); if (capa->ftm.max_ftms_per_burst && (out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst ||
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 887abed..5e5786c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c
@@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) { - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + struct xsk_addrs *xsk_addr; + + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (unlikely(!xsk_addr)) + return NULL; + + xsk_addr->addrs[0] = addr; + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + return xsk_addr; +} + +static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); + if (likely(xsk_addr)) + xsk_addr->num_descs = 1; + return xsk_addr; +} + +static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + return 0; + } + + if (unlikely(!__xsk_addrs_alloc(skb, addr))) + return -ENOMEM; + return 0; } static void xsk_inc_num_desc(struct sk_buff *skb) @@ -685,7 +718,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; for (i = 0; i < num_descs; i++) { @@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, - u64 addr) +static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { + int err; + + err = xsk_skb_destructor_set_addr(skb, addr); + if (unlikely(err)) + return err; + skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; - xsk_skb_destructor_set_addr(skb, addr); + return 0; } static void xsk_consume_skb(struct sk_buff *skb) @@ -740,7 +779,7 @@ static void xsk_consume_skb(struct sk_buff *skb) u32 num_descs = xsk_get_num_desc(skb); struct xsk_addrs *xsk_addr; - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } @@ -819,28 +858,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); - - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return ERR_PTR(err); + } } } else { struct xsk_addrs *xsk_addr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - } + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb @@ -856,8 +886,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) + if (unlikely(i >= MAX_SKB_FRAGS)) { + if (!xs->skb) + kfree_skb(skb); return ERR_PTR(-EOVERFLOW); + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); @@ -914,7 +947,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, xs->pool, hr); @@ -927,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct page *page; u8 *vaddr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) { - err = -ENOMEM; - goto free_err; - } - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; } if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { @@ -964,18 +987,28 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } + if (!xs->skb) { + err = xsk_skb_init_misc(skb, xs, desc->addr); + if (unlikely(err)) + goto free_err; + } xsk_inc_num_desc(skb); return skb; free_err: - if (skb && !skb_shinfo(skb)->nr_frags) + if (skb && !xs->skb) kfree_skb(skb); if (err == -EOVERFLOW) { - /* Drop the packet */ - xsk_inc_num_desc(xs->skb); - xsk_drop_skb(xs->skb); + if (xs->skb) { + /* Drop the packet */ + xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + } else { + xsk_cq_cancel_locked(xs->pool, 1); + xs->tx->invalid_descs++; + } xskq_cons_release(xs->tx); } else { /* Let application retry */
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index cd7bc50..d981cfd 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c
@@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (force_zc && force_copy) return -EINVAL; + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) + return -EOPNOTSUPP; + if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index afa4575..3bff346 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c
@@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, } xs = (struct xdp_sock *)sock->sk; + if (!READ_ONCE(xs->rx)) { + sockfd_put(sock); + return -ENOBUFS; + } map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a9652b4..cc35c2f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c
@@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) @@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header +
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1748d37..686014d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c
@@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del_rcu(&x->bydst); - hlist_del_rcu(&x->bysrc); - if (x->km.seq) - hlist_del_rcu(&x->byseq); + hlist_del_init_rcu(&x->bydst); + hlist_del_init_rcu(&x->bysrc); + if (!hlist_unhashed(&x->byseq)) + hlist_del_init_rcu(&x->byseq); if (!hlist_unhashed(&x->state_cache)) hlist_del_rcu(&x->state_cache); if (!hlist_unhashed(&x->state_cache_input)) hlist_del_rcu(&x->state_cache_input); - if (x->id.spi) - hlist_del_rcu(&x->byspi); + if (!hlist_unhashed(&x->byspi)) + hlist_del_init_rcu(&x->byspi); net->xfrm.state_num--; xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d56450f..38a90e5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c
@@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), };
diff --git a/rust/Makefile b/rust/Makefile index b361bfe..b9e9f51 100644 --- a/rust/Makefile +++ b/rust/Makefile
@@ -403,6 +403,8 @@ BINDGEN_TARGET_arm64 := aarch64-linux-gnu BINDGEN_TARGET_arm := arm-linux-gnueabi BINDGEN_TARGET_loongarch := loongarch64-linux-gnusf +# This is only for i386 UM builds, which need the 32-bit target not -m32 +BINDGEN_TARGET_i386 := i386-linux-gnu BINDGEN_TARGET_um := $(BINDGEN_TARGET_$(SUBARCH)) BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH))
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index adbafe8..403fc35 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs
@@ -119,13 +119,20 @@ pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<A // compatible `Layout`. let layout = Kmalloc::aligned_layout(Layout::new::<Self>()); + // Use a temporary vtable without a `release` callback until `data` is initialized, so + // init failure can release the DRM device without dropping uninitialized fields. + let alloc_vtable = bindings::drm_driver { + release: None, + ..Self::VTABLE + }; + // SAFETY: - // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, + // - `alloc_vtable` reference remains valid until no longer used, // - `dev` is valid by its type invarants, let raw_drm: *mut Self = unsafe { bindings::__drm_dev_alloc( dev.as_raw(), - &Self::VTABLE, + &alloc_vtable, layout.size(), mem::offset_of!(Self, dev), ) @@ -133,6 +140,10 @@ pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<A .cast(); let raw_drm = NonNull::new(from_err_ptr(raw_drm)?).ok_or(ENOMEM)?; + // SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was + // successful. + let drm_dev = unsafe { Self::into_drm_device(raw_drm) }; + // SAFETY: `raw_drm` is a valid pointer to `Self`. let raw_data = unsafe { ptr::addr_of_mut!((*raw_drm.as_ptr()).data) }; @@ -140,15 +151,14 @@ pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<A // - `raw_data` is a valid pointer to uninitialized memory. // - `raw_data` will not move until it is dropped. unsafe { data.__pinned_init(raw_data) }.inspect_err(|_| { - // SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was - // successful. - let drm_dev = unsafe { Self::into_drm_device(raw_drm) }; - // SAFETY: `__drm_dev_alloc()` was successful, hence `drm_dev` must be valid and the // refcount must be non-zero. unsafe { bindings::drm_dev_put(drm_dev) }; })?; + // SAFETY: `drm_dev` is still private to this function. + unsafe { (*drm_dev).driver = const { &Self::VTABLE } }; + // SAFETY: The reference count is one, and now we take ownership of that reference as a // `drm::Device`. Ok(unsafe { ARef::from_raw(raw_drm) })
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 75acda7..01b5bd4 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs
@@ -277,8 +277,17 @@ pub fn new(dev: &drm::Device<T::Driver>, size: usize, args: T::Args) -> Result<A // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above. unsafe { (*obj.as_raw()).funcs = &Self::OBJECT_FUNCS }; - // SAFETY: The arguments are all valid per the type invariants. - to_result(unsafe { bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) })?; + if let Err(err) = + // SAFETY: The arguments are all valid per the type invariants. + to_result(unsafe { + bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) + }) + { + // SAFETY: `drm_gem_object_init()` initializes the private GEM object state before + // failing, so `drm_gem_private_object_fini()` is the matching cleanup. + unsafe { bindings::drm_gem_private_object_fini(obj.obj.get()) }; + return Err(err); + } // SAFETY: We will never move out of `Self` as `ARef<Self>` is always treated as pinned. let ptr = KBox::into_raw(unsafe { Pin::into_inner_unchecked(obj) });
diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs index d025fb0..e1b6489 100644 --- a/rust/kernel/drm/gem/shmem.rs +++ b/rust/kernel/drm/gem/shmem.rs
@@ -19,10 +19,8 @@ }, error::to_result, prelude::*, - types::{ - ARef, - Opaque, // - }, // + sync::aref::ARef, + types::Opaque, // }; use core::{ ops::{
diff --git a/rust/pin-init/internal/src/init.rs b/rust/pin-init/internal/src/init.rs index daa3f1c..487ee00 100644 --- a/rust/pin-init/internal/src/init.rs +++ b/rust/pin-init/internal/src/init.rs
@@ -249,22 +249,6 @@ fn init_fields( }); // Again span for better diagnostics let write = quote_spanned!(ident.span()=> ::core::ptr::write); - // NOTE: the field accessor ensures that the initialized field is properly aligned. - // Unaligned fields will cause the compiler to emit E0793. We do not support - // unaligned fields since `Init::__init` requires an aligned pointer; the call to - // `ptr::write` below has the same requirement. - let accessor = if pinned { - let project_ident = format_ident!("__project_{ident}"); - quote! { - // SAFETY: TODO - unsafe { #data.#project_ident(&mut (*#slot).#ident) } - } - } else { - quote! { - // SAFETY: TODO - unsafe { &mut (*#slot).#ident } - } - }; quote! { #(#attrs)* { @@ -272,51 +256,31 @@ fn init_fields( // SAFETY: TODO unsafe { #write(&raw mut (*#slot).#ident, #value_ident) }; } - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; } } InitializerKind::Init { ident, value, .. } => { // Again span for better diagnostics let init = format_ident!("init", span = value.span()); - // NOTE: the field accessor ensures that the initialized field is properly aligned. - // Unaligned fields will cause the compiler to emit E0793. We do not support - // unaligned fields since `Init::__init` requires an aligned pointer; the call to - // `ptr::write` below has the same requirement. - let (value_init, accessor) = if pinned { - let project_ident = format_ident!("__project_{ident}"); - ( - quote! { - // SAFETY: - // - `slot` is valid, because we are inside of an initializer closure, we - // return when an error/panic occurs. - // - We also use `#data` to require the correct trait (`Init` or `PinInit`) - // for `#ident`. - unsafe { #data.#ident(&raw mut (*#slot).#ident, #init)? }; - }, - quote! { - // SAFETY: TODO - unsafe { #data.#project_ident(&mut (*#slot).#ident) } - }, - ) + let value_init = if pinned { + quote! { + // SAFETY: + // - `slot` is valid, because we are inside of an initializer closure, we + // return when an error/panic occurs. + // - We also use `#data` to require the correct trait (`Init` or `PinInit`) + // for `#ident`. + unsafe { #data.#ident(&raw mut (*#slot).#ident, #init)? }; + } } else { - ( - quote! { - // SAFETY: `slot` is valid, because we are inside of an initializer - // closure, we return when an error/panic occurs. - unsafe { - ::pin_init::Init::__init( - #init, - &raw mut (*#slot).#ident, - )? - }; - }, - quote! { - // SAFETY: TODO - unsafe { &mut (*#slot).#ident } - }, - ) + quote! { + // SAFETY: `slot` is valid, because we are inside of an initializer + // closure, we return when an error/panic occurs. + unsafe { + ::pin_init::Init::__init( + #init, + &raw mut (*#slot).#ident, + )? + }; + } }; quote! { #(#attrs)* @@ -324,9 +288,6 @@ fn init_fields( let #init = #value; #value_init } - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; } } InitializerKind::Code { block: value, .. } => quote! { @@ -339,18 +300,41 @@ fn init_fields( if let Some(ident) = kind.ident() { // `mixed_site` ensures that the guard is not accessible to the user-controlled code. let guard = format_ident!("__{ident}_guard", span = Span::mixed_site()); + + // NOTE: The reference is derived from the guard so that it only lives as long as the + // guard does and cannot escape the scope. If it's created via `&mut (*#slot).#ident` + // like the unaligned field guard, it will become effectively `'static`. + let accessor = if pinned { + let project_ident = format_ident!("__project_{ident}"); + quote! { + // SAFETY: the initialization is pinned. + unsafe { #data.#project_ident(#guard.let_binding()) } + } + } else { + quote! { + #guard.let_binding() + } + }; + res.extend(quote! { #(#cfgs)* - // Create the drop guard: + // Create the drop guard. // - // We rely on macro hygiene to make it impossible for users to access this local - // variable. - // SAFETY: We forget the guard later when initialization has succeeded. - let #guard = unsafe { + // SAFETY: + // - `&raw mut (*slot).#ident` is valid. + // - `make_field_check` checks that `&raw mut (*slot).#ident` is properly aligned. + // - `(*slot).#ident` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut #guard = unsafe { ::pin_init::__internal::DropGuard::new( &raw mut (*slot).#ident ) }; + + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; }); guards.push(guard); guard_attrs.push(cfgs); @@ -367,49 +351,49 @@ fn init_fields( } } -/// Generate the check for ensuring that every field has been initialized. +/// Generate the check for ensuring that every field has been initialized and aligned. fn make_field_check( fields: &Punctuated<InitializerField, Token![,]>, init_kind: InitKind, path: &Path, ) -> TokenStream { - let field_attrs = fields + let field_attrs: Vec<_> = fields .iter() - .filter_map(|f| f.kind.ident().map(|_| &f.attrs)); - let field_name = fields.iter().filter_map(|f| f.kind.ident()); - match init_kind { - InitKind::Normal => quote! { - // We use unreachable code to ensure that all fields have been mentioned exactly once, - // this struct initializer will still be type-checked and complain with a very natural - // error message if a field is forgotten/mentioned more than once. - #[allow(unreachable_code, clippy::diverging_sub_expression)] - // SAFETY: this code is never executed. - let _ = || unsafe { - ::core::ptr::write(slot, #path { - #( - #(#field_attrs)* - #field_name: ::core::panic!(), - )* - }) - }; - }, - InitKind::Zeroing => quote! { - // We use unreachable code to ensure that all fields have been mentioned at most once. - // Since the user specified `..Zeroable::zeroed()` at the end, all missing fields will - // be zeroed. This struct initializer will still be type-checked and complain with a - // very natural error message if a field is mentioned more than once, or doesn't exist. - #[allow(unreachable_code, clippy::diverging_sub_expression, unused_assignments)] - // SAFETY: this code is never executed. - let _ = || unsafe { - ::core::ptr::write(slot, #path { - #( - #(#field_attrs)* - #field_name: ::core::panic!(), - )* - ..::core::mem::zeroed() - }) - }; - }, + .filter_map(|f| f.kind.ident().map(|_| &f.attrs)) + .collect(); + let field_name: Vec<_> = fields.iter().filter_map(|f| f.kind.ident()).collect(); + let zeroing_trailer = match init_kind { + InitKind::Normal => None, + InitKind::Zeroing => Some(quote! { + ..::core::mem::zeroed() + }), + }; + quote! { + #[allow(unreachable_code, clippy::diverging_sub_expression)] + // We use unreachable code to perform field checks. They're still checked by the compiler. + // SAFETY: this code is never executed. + let _ = || unsafe { + // Create references to ensure that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` for value-initialization case has the same requirement. + #( + #(#field_attrs)* + let _ = &(*slot).#field_name; + )* + + // If the zeroing trailer is not present, this checks that all fields have been + // mentioned exactly once. If the zeroing trailer is present, all missing fields will be + // zeroed, so this checks that all fields have been mentioned at most once. The use of + // struct initializer will still generate very natural error messages for any misuse. + ::core::ptr::write(slot, #path { + #( + #(#field_attrs)* + #field_name: ::core::panic!(), + )* + #zeroing_trailer + }) + }; } }
diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 90adbdc..5720a62 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs
@@ -238,32 +238,42 @@ struct Foo { /// When a value of this type is dropped, it drops a `T`. /// /// Can be forgotten to prevent the drop. +/// +/// # Invariants +/// +/// - `ptr` is valid and properly aligned. +/// - `*ptr` is initialized and owned by this guard. pub struct DropGuard<T: ?Sized> { ptr: *mut T, } impl<T: ?Sized> DropGuard<T> { - /// Creates a new [`DropGuard<T>`]. It will [`ptr::drop_in_place`] `ptr` when it gets dropped. + /// Creates a drop guard and transfer the ownership of the pointer content. + /// + /// The ownership is only relinguished if the guard is forgotten via [`core::mem::forget`]. /// /// # Safety /// - /// `ptr` must be a valid pointer. - /// - /// It is the callers responsibility that `self` will only get dropped if the pointee of `ptr`: - /// - has not been dropped, - /// - is not accessible by any other means, - /// - will not be dropped by any other means. + /// - `ptr` is valid and properly aligned. + /// - `*ptr` is initialized, and the ownership is transferred to this guard. #[inline] pub unsafe fn new(ptr: *mut T) -> Self { + // INVARIANT: By safety requirement. Self { ptr } } + + /// Create a let binding for accessor use. + #[inline] + pub fn let_binding(&mut self) -> &mut T { + // SAFETY: Per type invariant. + unsafe { &mut *self.ptr } + } } impl<T: ?Sized> Drop for DropGuard<T> { #[inline] fn drop(&mut self) { - // SAFETY: A `DropGuard` can only be constructed using the unsafe `new` function - // ensuring that this operation is safe. + // SAFETY: `self.ptr` is valid, properly aligned and `*self.ptr` is owned by this guard. unsafe { ptr::drop_in_place(self.ptr) } } }
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index d1f16d7..0babb89 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h
@@ -312,8 +312,6 @@ int security_context_to_sid_default(const char *scontext, u32 scontext_len, int security_context_to_sid_force(const char *scontext, u32 scontext_len, u32 *sid); -int security_get_user_sids(u32 fromsid, const char *username, u32 **sids, u32 *nel); - int security_port_sid(u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 83aa765..25ca7d7 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c
@@ -76,7 +76,6 @@ struct selinux_fs_info { int *bool_pending_values; struct dentry *class_dir; unsigned long last_class_ino; - bool policy_opened; unsigned long last_ino; struct super_block *sb; }; @@ -272,35 +271,13 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - int new_value; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%d", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - pr_err("SELinux: https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable\n"); - pr_err("SELinux: Runtime disable is not supported, use selinux=0 on the kernel cmdline.\n"); - } - -out: - kfree(page); - return length; + /* + * Setting disable is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable + */ + pr_err_once("SELinux: %s (%d) wrote to disable. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_disable_ops = { @@ -362,44 +339,31 @@ struct policy_load_memory { static int sel_open_policy(struct inode *inode, struct file *filp) { - struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = NULL; int rc; - BUG_ON(filp->private_data); - - mutex_lock(&selinux_state.policy_mutex); - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (rc) - goto err; + return rc; - rc = -EBUSY; - if (fsi->policy_opened) - goto err; - - rc = -ENOMEM; plm = kzalloc_obj(*plm); if (!plm) - goto err; + return -ENOMEM; + mutex_lock(&selinux_state.policy_mutex); rc = security_read_policy(&plm->data, &plm->len); if (rc) goto err; - if ((size_t)i_size_read(inode) != plm->len) { inode_lock(inode); i_size_write(inode, plm->len); inode_unlock(inode); } - - fsi->policy_opened = 1; + mutex_unlock(&selinux_state.policy_mutex); filp->private_data = plm; - mutex_unlock(&selinux_state.policy_mutex); - return 0; err: mutex_unlock(&selinux_state.policy_mutex); @@ -412,13 +376,8 @@ static int sel_open_policy(struct inode *inode, struct file *filp) static int sel_release_policy(struct inode *inode, struct file *filp) { - struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; - BUG_ON(!plm); - - fsi->policy_opened = 0; - vfree(plm->data); kfree(plm); @@ -594,34 +553,31 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (!count) return -EINVAL; - mutex_lock(&selinux_state.policy_mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); if (length) - goto out; + return length; data = vmalloc(count); - if (!data) { - length = -ENOMEM; - goto out; - } + if (!data) + return -ENOMEM; if (copy_from_user(data, buf, count) != 0) { length = -EFAULT; goto out; } + mutex_lock(&selinux_state.policy_mutex); length = security_load_policy(data, count, &load_state); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); - goto out; + goto out_unlock; } fsi = file_inode(file)->i_sb->s_fs_info; length = sel_make_policy_nodes(fsi, load_state.policy); if (length) { pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n"); selinux_policy_cancel(&load_state); - goto out; + goto out_unlock; } selinux_policy_commit(&load_state); @@ -631,8 +587,9 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); -out: +out_unlock: mutex_unlock(&selinux_state.policy_mutex); +out: vfree(data); return length; } @@ -689,46 +646,13 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - unsigned int new_value; - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, - NULL); - if (length) - return length; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%u", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - char comm[sizeof(current->comm)]; - - strscpy(comm, current->comm); - pr_err("SELinux: %s (%d) set checkreqprot to 1. This is no longer supported.\n", - comm, current->pid); - } - - selinux_ima_measure_state(); - -out: - kfree(page); - return length; + /* + * Setting checkreqprot is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-checkreqprot + */ + pr_err_once("SELinux: %s (%d) wrote to checkreqprot. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_checkreqprot_ops = { .read = sel_read_checkreqprot, @@ -1073,69 +997,11 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) static ssize_t sel_write_user(struct file *file, char *buf, size_t size) { - char *con = NULL, *user = NULL, *ptr; - u32 sid, *sids = NULL; - ssize_t length; - char *newcon; - int rc; - u32 i, len, nsids; - - pr_warn_ratelimited("SELinux: %s (%d) wrote to /sys/fs/selinux/user!" - " This will not be supported in the future; please update your" - " userspace.\n", current->comm, current->pid); - ssleep(5); - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__COMPUTE_USER, - NULL); - if (length) - goto out; - - length = -ENOMEM; - con = kzalloc(size + 1, GFP_KERNEL); - if (!con) - goto out; - - length = -ENOMEM; - user = kzalloc(size + 1, GFP_KERNEL); - if (!user) - goto out; - - length = -EINVAL; - if (sscanf(buf, "%s %s", con, user) != 2) - goto out; - - length = security_context_str_to_sid(con, &sid, GFP_KERNEL); - if (length) - goto out; - - length = security_get_user_sids(sid, user, &sids, &nsids); - if (length) - goto out; - - length = sprintf(buf, "%u", nsids) + 1; - ptr = buf + length; - for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(sids[i], &newcon, &len); - if (rc) { - length = rc; - goto out; - } - if ((length + len) >= SIMPLE_TRANSACTION_LIMIT) { - kfree(newcon); - length = -ERANGE; - goto out; - } - memcpy(ptr, newcon, len); - kfree(newcon); - ptr += len; - length += len; - } -out: - kfree(sids); - kfree(user); - kfree(con); - return length; + pr_err_once("SELinux: %s (%d) wrote to user. This is no longer supported.\n", + current->comm, current->pid); + buf[0] = '0'; + buf[1] = 0; + return 2; } static ssize_t sel_write_member(struct file *file, char *buf, size_t size)
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e8e7ccb..143021c 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c
@@ -2746,131 +2746,6 @@ int security_node_sid(u16 domain, return rc; } -#define SIDS_NEL 25 - -/** - * security_get_user_sids - Obtain reachable SIDs for a user. - * @fromsid: starting SID - * @username: username - * @sids: array of reachable SIDs for user - * @nel: number of elements in @sids - * - * Generate the set of SIDs for legal security contexts - * for a given user that can be reached by @fromsid. - * Set *@sids to point to a dynamically allocated - * array containing the set of SIDs. Set *@nel to the - * number of elements in the array. - */ - -int security_get_user_sids(u32 fromsid, - const char *username, - u32 **sids, - u32 *nel) -{ - struct selinux_policy *policy; - struct policydb *policydb; - struct sidtab *sidtab; - struct context *fromcon, usercon; - u32 *mysids = NULL, *mysids2, sid; - u32 i, j, mynel, maxnel = SIDS_NEL; - struct user_datum *user; - struct role_datum *role; - struct ebitmap_node *rnode, *tnode; - int rc; - - *sids = NULL; - *nel = 0; - - if (!selinux_initialized()) - return 0; - - mysids = kcalloc(maxnel, sizeof(*mysids), GFP_KERNEL); - if (!mysids) - return -ENOMEM; - -retry: - mynel = 0; - rcu_read_lock(); - policy = rcu_dereference(selinux_state.policy); - policydb = &policy->policydb; - sidtab = policy->sidtab; - - context_init(&usercon); - - rc = -EINVAL; - fromcon = sidtab_search(sidtab, fromsid); - if (!fromcon) - goto out_unlock; - - rc = -EINVAL; - user = symtab_search(&policydb->p_users, username); - if (!user) - goto out_unlock; - - usercon.user = user->value; - - ebitmap_for_each_positive_bit(&user->roles, rnode, i) { - role = policydb->role_val_to_struct[i]; - usercon.role = i + 1; - ebitmap_for_each_positive_bit(&role->types, tnode, j) { - usercon.type = j + 1; - - if (mls_setup_user_range(policydb, fromcon, user, - &usercon)) - continue; - - rc = sidtab_context_to_sid(sidtab, &usercon, &sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out_unlock; - if (mynel < maxnel) { - mysids[mynel++] = sid; - } else { - rc = -ENOMEM; - maxnel += SIDS_NEL; - mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC); - if (!mysids2) - goto out_unlock; - memcpy(mysids2, mysids, mynel * sizeof(*mysids2)); - kfree(mysids); - mysids = mysids2; - mysids[mynel++] = sid; - } - } - } - rc = 0; -out_unlock: - rcu_read_unlock(); - if (rc || !mynel) { - kfree(mysids); - return rc; - } - - rc = -ENOMEM; - mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL); - if (!mysids2) { - kfree(mysids); - return rc; - } - for (i = 0, j = 0; i < mynel; i++) { - struct av_decision dummy_avd; - rc = avc_has_perm_noaudit(fromsid, mysids[i], - SECCLASS_PROCESS, /* kernel value */ - PROCESS__TRANSITION, AVC_STRICT, - &dummy_avd); - if (!rc) - mysids2[j++] = mysids[i]; - cond_resched(); - } - kfree(mysids); - *sids = mysids2; - *nel = j; - return 0; -} - /** * __security_genfs_sid - Helper to obtain a SID for a file in a filesystem * @policy: policy
diff --git a/sound/core/misc.c b/sound/core/misc.c index 5aca09e..833124c 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c
@@ -148,9 +148,11 @@ EXPORT_SYMBOL_GPL(snd_fasync_helper); void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) { - if (!fasync || !fasync->on) + if (!fasync) return; guard(spinlock_irqsave)(&snd_fasync_lock); + if (!fasync->on) + return; fasync->signal = signal; fasync->poll = poll; list_move(&fasync->list, &snd_fasync_list); @@ -163,8 +165,10 @@ void snd_fasync_free(struct snd_fasync *fasync) if (!fasync) return; - scoped_guard(spinlock_irq, &snd_fasync_lock) + scoped_guard(spinlock_irq, &snd_fasync_lock) { + fasync->on = 0; list_del_init(&fasync->list); + } flush_work(&snd_fasync_work); kfree(fasync);
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 75a7a2a..5719637 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c
@@ -1253,7 +1253,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3)) client->midi_version = client_info->midi_version; memcpy(client->event_filter, client_info->event_filter, 32); - client->group_filter = client_info->group_filter; + client->group_filter = client_info->group_filter & SND_SEQ_GROUP_FILTER_MASK; /* notify the change */ snd_seq_system_client_ev_client_change(client->number);
diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index ece02c5..feea8bb 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h
@@ -14,6 +14,9 @@ /* client manager */ +#define SND_SEQ_GROUP_FILTER_MASK GENMASK(SNDRV_UMP_MAX_GROUPS, 0) +#define SND_SEQ_GROUP_FILTER_GROUPS GENMASK(SNDRV_UMP_MAX_GROUPS, 1) + struct snd_seq_user_client { struct file *file; /* file struct of client */ /* ... */ @@ -40,7 +43,7 @@ struct snd_seq_client { int number; /* client number */ unsigned int filter; /* filter flags */ DECLARE_BITMAP(event_filter, 256); - unsigned short group_filter; + unsigned int group_filter; snd_use_lock_t use_lock; int event_lost; /* ports */
diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index fdc76f2..9079ccf 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c
@@ -369,7 +369,7 @@ static void setup_client_group_filter(struct seq_ump_client *client) cptr = snd_seq_kernel_client_get(client->seq_client); if (!cptr) return; - filter = ~(1U << 0); /* always allow groupless messages */ + filter = SND_SEQ_GROUP_FILTER_GROUPS; /* always allow groupless messages */ for (p = 0; p < SNDRV_UMP_MAX_GROUPS; p++) { if (client->ump->groups[p].active) filter &= ~(1U << (p + 1));
diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c index 5bfec4c..7f93557 100644 --- a/sound/drivers/pcmtest.c +++ b/sound/drivers/pcmtest.c
@@ -679,9 +679,9 @@ static ssize_t pattern_read(struct file *file, char __user *u_buff, size_t len, return 0; if (copy_to_user(u_buff, patt_buf->buf + *off, to_read)) - to_read = 0; - else - *off += to_read; + return -EFAULT; + + *off += to_read; return to_read; }
diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c index 867b4ea..6270263 100644 --- a/sound/firewire/tascam/tascam-hwdep.c +++ b/sound/firewire/tascam/tascam-hwdep.c
@@ -73,6 +73,7 @@ static long tscm_hwdep_read_queue(struct snd_tscm *tscm, char __user *buf, length = rounddown(remained, sizeof(*entries)); if (length == 0) break; + tail_pos = head_pos + length / sizeof(*entries); spin_unlock_irq(&tscm->lock); if (copy_to_user(pos, &entries[head_pos], length))
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a9cd03b..11d0ea8 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c
@@ -1669,6 +1669,21 @@ static void alc295_fixup_hp_mute_led_coefbit11(struct hda_codec *codec, } } +/* Override wrong pin to NID 0x1b (F.32 BIOS reports 0x18 via DMI OEM string) + * on HP pavilion 15-cs1xxx laptops + */ +static void alc295_fixup_hp_pavilion_mute_led_1b(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + alc269_fixup_hp_mute_led(codec, fix, action); + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->mute_led_nid = 0x1b; +} + static void alc233_fixup_lenovo_coef_micmute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3390,6 +3405,19 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, } } +static void alc256_fixup_xiaomi_pro15_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * On the Xiaomi Mi Laptop Pro 15 (TM1905, SSID 1d72:1905) the ALC256 + * codec sets coefficient 0x10 bit 9 to 1 after S3 resume, silencing + * the internal speaker. Bluetooth and HDMI audio are unaffected. + * Clear the bit so the speaker keeps working across suspend cycles. + */ + alc_update_coef_idx(codec, 0x10, 1<<9, 0); +} + static void alc256_decrease_headphone_amp_val(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3857,6 +3885,7 @@ enum { ALC290_FIXUP_SUBWOOFER, ALC290_FIXUP_SUBWOOFER_HSJACK, ALC295_FIXUP_HP_MUTE_LED_COEFBIT11, + ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_LENOVO_XPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, @@ -4052,6 +4081,7 @@ enum { ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, + ALC256_FIXUP_XIAOMI_PRO15_RESUME, ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, ALC287_FIXUP_LEGION_16ACHG6, @@ -4123,6 +4153,7 @@ enum { ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED, ALC236_FIXUP_HP_DMIC, ALC256_FIXUP_HONOR_MRB_XXX_M1020_AUDIO, + ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -5700,6 +5731,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_hp_mute_led_coefbit11, }, + [ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc295_fixup_hp_pavilion_mute_led_1b, + }, [ALC298_FIXUP_SAMSUNG_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_samsung_amp, @@ -6240,6 +6275,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC256_FIXUP_XIAOMI_PRO15_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_xiaomi_pro15_resume, + }, [ALC287_FIXUP_LEGION_16ACHG6] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_legion_16achg6_speakers, @@ -6675,6 +6714,12 @@ static const struct hda_fixup alc269_fixups[] = { { 0x1b, 0x90170110 }, { } } + }, + [ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, + .chained = true, + .chain_id = ALC245_FIXUP_HP_X360_MUTE_LEDS } }; @@ -6906,6 +6951,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8548, "HP EliteBook x360 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x856a, "HP Pavilion 15-cs1xxx", ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x8603, "HP Omen 17-cb0xxx", ALC285_FIXUP_HP_MUTE_LED), @@ -7097,7 +7143,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8be6, "HP Envy 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8be7, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8be8, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8be9, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8be9, "HP Envy x360 2-in-1 Laptop 15-fh0xxx", ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX), SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c15, "HP Spectre x360 2-in-1 Laptop 14-eu0xxx", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8c16, "HP Spectre x360 2-in-1 Laptop 16-aa0xxx", ALC245_FIXUP_HP_SPECTRE_X360_16_AA0XXX), @@ -7629,6 +7675,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), + /* Yoga Pro 9 16IMH9 shares PCI SSID 17aa:3811 with Legion S7 15IMH05; + * use codec SSID to distinguish them + */ + HDA_CODEC_QUIRK(0x17aa, 0x38d6, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3811, "Legion S7 15IMH05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), @@ -7774,6 +7824,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1905, "Xiaomi Mi Laptop Pro 15", ALC256_FIXUP_XIAOMI_PRO15_RESUME), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1e39, 0xca14, "MEDION NM14LNL", ALC233_FIXUP_MEDION_MTL_SPK),
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index c5cf458..7a637d6 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -62,6 +62,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN Gaming Laptop 16-ap0xxx"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5525"), } @@ -483,6 +490,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VE"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), } }, @@ -671,6 +685,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8E35"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"), DMI_MATCH(DMI_BOARD_NAME, "MRID6"), }
diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index 8ab2e60..6e8ef9c 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c
@@ -60,19 +60,6 @@ low before proceeding with the configuration sequence */ #define AB8500_ANC_SM_DELAY 2000 -#define AB8500_FILTER_CONTROL(xname, xcount, xmin, xmax) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \ - .info = filter_control_info, \ - .get = filter_control_get, .put = filter_control_put, \ - .private_value = (unsigned long)&(struct filter_control) \ - {.count = xcount, .min = xmin, .max = xmax} } - -struct filter_control { - long min, max; - unsigned int count; - long value[128]; -}; - /* Sidetone states */ static const char * const enum_sid_state[] = { "Unconfigured", @@ -85,45 +72,13 @@ enum sid_state { SID_FIR_CONFIGURED = 2, }; -static const char * const enum_anc_state[] = { - "Unconfigured", - "Apply FIR and IIR", - "FIR and IIR are configured", - "Apply FIR", - "FIR is configured", - "Apply IIR", - "IIR is configured" -}; -enum anc_state { - ANC_UNCONFIGURED = 0, - ANC_APPLY_FIR_IIR = 1, - ANC_FIR_IIR_CONFIGURED = 2, - ANC_APPLY_FIR = 3, - ANC_FIR_CONFIGURED = 4, - ANC_APPLY_IIR = 5, - ANC_IIR_CONFIGURED = 6 -}; - -/* Analog microphones */ -enum amic_idx { - AMIC_IDX_1A, - AMIC_IDX_1B, - AMIC_IDX_2 -}; - /* Private data for AB8500 device-driver */ struct ab8500_codec_drvdata { struct regmap *regmap; struct mutex ctrl_lock; /* Sidetone */ - long *sid_fir_values; enum sid_state sid_status; - - /* ANC */ - long *anc_fir_values; - long *anc_iir_values; - enum anc_state anc_status; }; static inline const char *amic_micbias_str(enum amic_micbias micbias) @@ -1024,89 +979,6 @@ static const struct snd_soc_dapm_route ab8500_dapm_routes_mic2_vamicx[] = { {"MIC2 V-AMICx Enable", NULL, "V-AMIC2"}, }; -/* ANC FIR-coefficients configuration sequence */ -static void anc_fir(struct snd_soc_component *component, - unsigned int bnk, unsigned int par, unsigned int val) -{ - if (par == 0 && bnk == 0) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCFIRUPDATE), - BIT(AB8500_ANCCONF1_ANCFIRUPDATE)); - - snd_soc_component_write(component, AB8500_ANCCONF5, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_ANCCONF6, val & 0xff); - - if (par == AB8500_ANC_FIR_COEFFS - 1 && bnk == 1) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCFIRUPDATE), 0); -} - -/* ANC IIR-coefficients configuration sequence */ -static void anc_iir(struct snd_soc_component *component, unsigned int bnk, - unsigned int par, unsigned int val) -{ - if (par == 0) { - if (bnk == 0) { - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRINIT), - BIT(AB8500_ANCCONF1_ANCIIRINIT)); - usleep_range(AB8500_ANC_SM_DELAY, AB8500_ANC_SM_DELAY*2); - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRINIT), 0); - usleep_range(AB8500_ANC_SM_DELAY, AB8500_ANC_SM_DELAY*2); - } else { - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRUPDATE), - BIT(AB8500_ANCCONF1_ANCIIRUPDATE)); - } - } else if (par > 3) { - snd_soc_component_write(component, AB8500_ANCCONF7, 0); - snd_soc_component_write(component, AB8500_ANCCONF8, val >> 16 & 0xff); - } - - snd_soc_component_write(component, AB8500_ANCCONF7, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_ANCCONF8, val & 0xff); - - if (par == AB8500_ANC_IIR_COEFFS - 1 && bnk == 1) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRUPDATE), 0); -} - -/* ANC IIR-/FIR-coefficients configuration sequence */ -static void anc_configure(struct snd_soc_component *component, - bool apply_fir, bool apply_iir) -{ - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - unsigned int bnk, par, val; - - dev_dbg(component->dev, "%s: Enter.\n", __func__); - - if (apply_fir) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ENANC), 0); - - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ENANC), BIT(AB8500_ANCCONF1_ENANC)); - - if (apply_fir) - for (bnk = 0; bnk < AB8500_NR_OF_ANC_COEFF_BANKS; bnk++) - for (par = 0; par < AB8500_ANC_FIR_COEFFS; par++) { - val = snd_soc_component_read(component, - drvdata->anc_fir_values[par]); - anc_fir(component, bnk, par, val); - } - - if (apply_iir) - for (bnk = 0; bnk < AB8500_NR_OF_ANC_COEFF_BANKS; bnk++) - for (par = 0; par < AB8500_ANC_IIR_COEFFS; par++) { - val = snd_soc_component_read(component, - drvdata->anc_iir_values[par]); - anc_iir(component, bnk, par, val); - } - - dev_dbg(component->dev, "%s: Exit.\n", __func__); -} - /* * Control-events */ @@ -1130,7 +1002,7 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - unsigned int param, sidconf, val; + unsigned int param, sidconf; int status = 1; dev_dbg(component->dev, "%s: Enter\n", __func__); @@ -1159,9 +1031,8 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol, snd_soc_component_write(component, AB8500_SIDFIRADR, 0); for (param = 0; param < AB8500_SID_FIR_COEFFS; param++) { - val = snd_soc_component_read(component, drvdata->sid_fir_values[param]); - snd_soc_component_write(component, AB8500_SIDFIRCOEF1, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_SIDFIRCOEF2, val & 0xff); + snd_soc_component_write(component, AB8500_SIDFIRCOEF1, 0); + snd_soc_component_write(component, AB8500_SIDFIRCOEF2, 0); } snd_soc_component_update_bits(component, AB8500_SIDFIRADR, @@ -1180,136 +1051,6 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol, return status; } -static int anc_status_control_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - - mutex_lock(&drvdata->ctrl_lock); - ucontrol->value.enumerated.item[0] = drvdata->anc_status; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - -static int anc_status_control_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - struct device *dev = component->dev; - bool apply_fir, apply_iir; - unsigned int req; - int status; - - dev_dbg(dev, "%s: Enter.\n", __func__); - - mutex_lock(&drvdata->ctrl_lock); - - req = ucontrol->value.enumerated.item[0]; - if (req >= ARRAY_SIZE(enum_anc_state)) { - status = -EINVAL; - goto cleanup; - } - if (req != ANC_APPLY_FIR_IIR && req != ANC_APPLY_FIR && - req != ANC_APPLY_IIR) { - dev_err(dev, "%s: ERROR: Unsupported status to set '%s'!\n", - __func__, enum_anc_state[req]); - status = -EINVAL; - goto cleanup; - } - apply_fir = req == ANC_APPLY_FIR || req == ANC_APPLY_FIR_IIR; - apply_iir = req == ANC_APPLY_IIR || req == ANC_APPLY_FIR_IIR; - - status = snd_soc_dapm_force_enable_pin(dapm, "ANC Configure Input"); - if (status < 0) { - dev_err(dev, - "%s: ERROR: Failed to enable power (status = %d)!\n", - __func__, status); - goto cleanup; - } - snd_soc_dapm_sync(dapm); - - anc_configure(component, apply_fir, apply_iir); - - if (apply_fir) { - if (drvdata->anc_status == ANC_IIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_IIR_CONFIGURED; - else if (drvdata->anc_status != ANC_FIR_IIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_CONFIGURED; - } - if (apply_iir) { - if (drvdata->anc_status == ANC_FIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_IIR_CONFIGURED; - else if (drvdata->anc_status != ANC_FIR_IIR_CONFIGURED) - drvdata->anc_status = ANC_IIR_CONFIGURED; - } - - status = snd_soc_dapm_disable_pin(dapm, "ANC Configure Input"); - snd_soc_dapm_sync(dapm); - -cleanup: - mutex_unlock(&drvdata->ctrl_lock); - - if (status < 0) - dev_err(dev, "%s: Unable to configure ANC! (status = %d)\n", - __func__, status); - - dev_dbg(dev, "%s: Exit.\n", __func__); - - return (status < 0) ? status : 1; -} - -static int filter_control_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - - uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = fc->count; - uinfo->value.integer.min = fc->min; - uinfo->value.integer.max = fc->max; - - return 0; -} - -static int filter_control_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = snd_soc_component_get_drvdata(component); - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - unsigned int i; - - mutex_lock(&drvdata->ctrl_lock); - for (i = 0; i < fc->count; i++) - ucontrol->value.integer.value[i] = fc->value[i]; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - -static int filter_control_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = snd_soc_component_get_drvdata(component); - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - unsigned int i; - - mutex_lock(&drvdata->ctrl_lock); - for (i = 0; i < fc->count; i++) - fc->value[i] = ucontrol->value.integer.value[i]; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - /* * Controls - Non-DAPM ASoC */ @@ -1597,7 +1338,6 @@ static SOC_ENUM_SINGLE_DECL(soc_enum_bfifomast, static SOC_ENUM_SINGLE_EXT_DECL(soc_enum_sidstate, enum_sid_state); /* ANC */ -static SOC_ENUM_SINGLE_EXT_DECL(soc_enum_ancstate, enum_anc_state); static struct snd_kcontrol_new ab8500_ctrls[] = { /* Charge pump */ @@ -1873,8 +1613,6 @@ static struct snd_kcontrol_new ab8500_ctrls[] = { AB8500_FIFOCONF6_BFIFOSAMPLE_MAX, 0), /* ANC */ - SOC_ENUM_EXT("ANC Status", soc_enum_ancstate, - anc_status_control_get, anc_status_control_put), SOC_SINGLE_XR_SX("ANC Warp Delay Shift", AB8500_ANCCONF2, 1, AB8500_ANCCONF2_SHIFT, AB8500_ANCCONF2_MIN, AB8500_ANCCONF2_MAX, 0), @@ -1895,21 +1633,6 @@ static struct snd_kcontrol_new ab8500_ctrls[] = { AB8500_SIDFIRADR, AB8500_SIDFIRADR_FIRSIDSET, 0), }; -static struct snd_kcontrol_new ab8500_filter_controls[] = { - AB8500_FILTER_CONTROL("ANC FIR Coefficients", AB8500_ANC_FIR_COEFFS, - AB8500_ANC_FIR_COEFF_MIN, AB8500_ANC_FIR_COEFF_MAX), - AB8500_FILTER_CONTROL("ANC IIR Coefficients", AB8500_ANC_IIR_COEFFS, - AB8500_ANC_IIR_COEFF_MIN, AB8500_ANC_IIR_COEFF_MAX), - AB8500_FILTER_CONTROL("Sidetone FIR Coefficients", - AB8500_SID_FIR_COEFFS, AB8500_SID_FIR_COEFF_MIN, - AB8500_SID_FIR_COEFF_MAX) -}; -enum ab8500_filter { - AB8500_FILTER_ANC_FIR = 0, - AB8500_FILTER_ANC_IIR = 1, - AB8500_FILTER_SID_FIR = 2, -}; - /* * Extended interface for codec-driver */ @@ -2454,7 +2177,6 @@ static int ab8500_codec_probe(struct snd_soc_component *component) struct device_node *np = dev->of_node; struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(dev); struct ab8500_codec_platform_data codec_pdata; - struct filter_control *fc; int status; dev_dbg(dev, "%s: Enter.\n", __func__); @@ -2486,25 +2208,6 @@ static int ab8500_codec_probe(struct snd_soc_component *component) snd_soc_component_write(component, AB8500_SHORTCIRCONF, BIT(AB8500_SHORTCIRCONF_HSZCDDIS)); - /* Add filter controls */ - status = snd_soc_add_component_controls(component, ab8500_filter_controls, - ARRAY_SIZE(ab8500_filter_controls)); - if (status < 0) { - dev_err(dev, - "%s: failed to add ab8500 filter controls (%d).\n", - __func__, status); - return status; - } - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_ANC_FIR].private_value; - drvdata->anc_fir_values = (long *)fc->value; - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_ANC_IIR].private_value; - drvdata->anc_iir_values = (long *)fc->value; - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_SID_FIR].private_value; - drvdata->sid_fir_values = (long *)fc->value; - snd_soc_dapm_disable_pin(dapm, "ANC Configure Input"); mutex_init(&drvdata->ctrl_lock); @@ -2538,7 +2241,6 @@ static int ab8500_codec_driver_probe(struct platform_device *pdev) if (!drvdata) return -ENOMEM; drvdata->sid_status = SID_UNCONFIGURED; - drvdata->anc_status = ANC_UNCONFIGURED; dev_set_drvdata(&pdev->dev, drvdata); drvdata->regmap = devm_regmap_init(&pdev->dev, NULL, &pdev->dev,
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 033e56d..795e276 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c
@@ -851,9 +851,11 @@ int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_sou err: regcache_cache_only(cs35l56_base->regmap, true); - regmap_multi_reg_write_bypassed(cs35l56_base->regmap, - cs35l56_hibernate_seq, - ARRAY_SIZE(cs35l56_hibernate_seq)); + if (cs35l56_base->can_hibernate) { + regmap_multi_reg_write_bypassed(cs35l56_base->regmap, + cs35l56_hibernate_seq, + ARRAY_SIZE(cs35l56_hibernate_seq)); + } return ret; } @@ -1728,8 +1730,7 @@ int cs35l56_read_onchip_spkid(struct cs35l56_base *cs35l56_base) ret = regmap_read(regmap, CS35L56_GPIO_STATUS1, &val); if (ret) { - dev_err(cs35l56_base->dev, "GPIO%d status read failed: %d\n", - cs35l56_base->onchip_spkid_gpios[i] + 1, ret); + dev_err(cs35l56_base->dev, "GPIO status read failed: %d\n", ret); return ret; }
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 378017f..849d70c 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c
@@ -1956,9 +1956,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) goto err; } - ret = devm_snd_soc_register_component(cs35l56->base.dev, - &soc_component_dev_cs35l56, - cs35l56_dai, ARRAY_SIZE(cs35l56_dai)); + ret = snd_soc_register_component(cs35l56->base.dev, + &soc_component_dev_cs35l56, + cs35l56_dai, ARRAY_SIZE(cs35l56_dai)); if (ret < 0) { dev_err_probe(cs35l56->base.dev, ret, "Register codec failed\n"); goto err; @@ -1970,6 +1970,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 0); regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies); + if (cs35l56->dsp_wq) + destroy_workqueue(cs35l56->dsp_wq); + return ret; } EXPORT_SYMBOL_NS_GPL(cs35l56_common_probe, "SND_SOC_CS35L56_CORE"); @@ -2057,6 +2060,8 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_init, "SND_SOC_CS35L56_CORE"); void cs35l56_remove(struct cs35l56_private *cs35l56) { + snd_soc_unregister_component(cs35l56->base.dev); + cs35l56->base.init_done = false; /*
diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 6aab6d2..5521126 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c
@@ -684,18 +684,33 @@ static int tas2764_read_die_temp(struct tas2764_priv *tas2764, long *result) * As per datasheet, subtract 93 from raw value to get degrees * Celsius. hwmon wants millidegrees. * - * NOTE: The chip will initialise the TAS2764_TEMP register to - * 2.6 *C to avoid triggering temperature protection. Since the - * ADC is powered down during software shutdown, this value will - * persist until the chip is fully powered up (e.g. the PCM it's - * attached to is opened). The ADC will power down again when - * the chip is put back into software shutdown, with the last - * value sampled persisting in the ADC's register. + * NOTE: The TAS2764 datasheet mentions initialising TAS2764_TEMP + * such that the temperature is 2.6 *C, however the register + * is actually initialised to 0. The ADC is also powered down during + * software shutdown. The last sampled temperature will persist + * in the register while the amp is in this power state. */ + if (reg == 0) + return -ENODATA; + *result = (reg - 93) * 1000; return 0; } +static int tas2764_hwmon_is_fault(struct tas2764_priv *tas2764, long *result) +{ + int ret; + long temp; + + ret = tas2764_read_die_temp(tas2764, &temp); + if (ret == -ENODATA) { + *result = true; + return 0; + } + + return ret; +} + static umode_t tas2764_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) @@ -705,6 +720,7 @@ static umode_t tas2764_hwmon_is_visible(const void *data, switch (attr) { case hwmon_temp_input: + case hwmon_temp_fault: return 0444; default: break; @@ -724,6 +740,9 @@ static int tas2764_hwmon_read(struct device *dev, case hwmon_temp_input: ret = tas2764_read_die_temp(tas2764, val); break; + case hwmon_temp_fault: + ret = tas2764_hwmon_is_fault(tas2764, val); + break; default: ret = -EOPNOTSUPP; break; @@ -733,7 +752,7 @@ static int tas2764_hwmon_read(struct device *dev, } static const struct hwmon_channel_info *const tas2764_hwmon_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_FAULT), NULL };
diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 50501bc..dbda9f3 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c
@@ -633,10 +633,27 @@ static int tas2770_read_die_temp(struct tas2770_priv *tas2770, long *result) * value read back from its registers will be the last value sampled * before entering software shutdown. */ + if (reading == 0) + return -ENODATA; + *result = (reading - (93 * 16)) * 1000 / 16; return 0; } +static int tas2770_hwmon_is_fault(struct tas2770_priv *tas2770, long *result) +{ + int ret; + long temp; + + ret = tas2770_read_die_temp(tas2770, &temp); + if (ret == -ENODATA) { + *result = true; + return 0; + } + + return ret; +} + static umode_t tas2770_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) @@ -646,6 +663,7 @@ static umode_t tas2770_hwmon_is_visible(const void *data, switch (attr) { case hwmon_temp_input: + case hwmon_temp_fault: return 0444; default: break; @@ -665,6 +683,9 @@ static int tas2770_hwmon_read(struct device *dev, case hwmon_temp_input: ret = tas2770_read_die_temp(tas2770, val); break; + case hwmon_temp_fault: + ret = tas2770_hwmon_is_fault(tas2770, val); + break; default: ret = -EOPNOTSUPP; break; @@ -674,7 +695,7 @@ static int tas2770_hwmon_read(struct device *dev, } static const struct hwmon_channel_info *const tas2770_hwmon_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_FAULT), NULL };
diff --git a/sound/soc/codecs/wm_adsp_fw_find_test.c b/sound/soc/codecs/wm_adsp_fw_find_test.c index d0c7fb3..ae686dc 100644 --- a/sound/soc/codecs/wm_adsp_fw_find_test.c +++ b/sound/soc/codecs/wm_adsp_fw_find_test.c
@@ -45,6 +45,34 @@ struct wm_adsp_fw_find_test_params { /* Dummy struct firmware to return from wm_adsp_request_firmware_files */ static const struct firmware wm_adsp_find_test_dummy_firmware; +static void wm_adsp_fw_find_test_release_firmware_files_stub(struct wm_adsp_fw_files *fw) +{ + /* + * fw->wmfw.firmware and fw->coeff.firmware allocated by this KUnit + * test are dummies not allocated by the real request_firmware() call + * so they must not be passed to release_firmware(). + * This function replaces wm_adsp_release_firmware_files(). + */ + + if (!fw) + return; + + kfree(fw->wmfw.filename); + kfree(fw->coeff.filename); + + fw->wmfw.firmware = NULL; + fw->coeff.firmware = NULL; + fw->wmfw.filename = NULL; + fw->coeff.filename = NULL; +} + +static void wm_adsp_free_found_fw(struct kunit *test) +{ + struct wm_adsp_fw_find_test *priv = test->priv; + + wm_adsp_fw_find_test_release_firmware_files_stub(&priv->found_fw); +} + /* Simple lookup of a filename in a list of names */ static int wm_adsp_fw_find_test_firmware_request_simple_stub(const struct firmware **firmware, const char *filename, @@ -97,9 +125,14 @@ static void wm_adsp_fw_find_test_pick_file(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_simple_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); ret = wm_adsp_request_firmware_files(dsp, &priv->found_fw); kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); + KUNIT_EXPECT_EQ_MSG(test, ret, (params->expect_wmfw || params->expect_bin) ? 0 : -ENOENT, "%s\n", priv->searched_fw_files); @@ -173,10 +206,13 @@ static void wm_adsp_fw_find_test_search_order(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); wm_adsp_request_firmware_files(dsp, &priv->found_fw); - kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); KUNIT_EXPECT_STREQ(test, priv->searched_fw_files, params->expected_searches); @@ -201,6 +237,7 @@ static void wm_adsp_fw_find_test_find_firmware_byindex(struct kunit *test) dsp->cs_dsp.name = "cs1234"; dsp->part = "dsp1"; + for (dsp->fw = 0;; dsp->fw++) { fw_name = wm_adsp_get_fwf_name_by_index(dsp->fw); if (!fw_name) @@ -209,14 +246,22 @@ static void wm_adsp_fw_find_test_find_firmware_byindex(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); wm_adsp_request_firmware_files(dsp, &priv->found_fw); + kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); KUNIT_EXPECT_NOT_NULL_MSG(test, strstr(priv->searched_fw_files, fw_name), "fw#%d Did not find '%s' in '%s'\n", dsp->fw, fw_name, priv->searched_fw_files); + + wm_adsp_free_found_fw(test); + memset(priv->searched_fw_files, 0, sizeof(priv->searched_fw_files)); } } @@ -255,15 +300,7 @@ static int wm_adsp_fw_find_test_case_init(struct kunit *test) static void wm_adsp_fw_find_test_case_exit(struct kunit *test) { - struct wm_adsp_fw_find_test *priv = test->priv; - - /* - * priv->found_wmfw_firmware and priv->found_bin_firmware are - * dummies not allocated by the real request_firmware() call they - * must not be passed to release_firmware(). - */ - kfree(priv->found_fw.wmfw.filename); - kfree(priv->found_fw.coeff.filename); + wm_adsp_free_found_fw(test); } static void wm_adsp_fw_find_test_param_desc(const struct wm_adsp_fw_find_test_params *param,
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index ee16cf6..6677d3b 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c
@@ -228,10 +228,14 @@ static int fsl_xcvr_capds_put(struct snd_kcontrol *kcontrol, { struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai); + int changed; - memcpy(xcvr->cap_ds, ucontrol->value.bytes.data, FSL_XCVR_CAPDS_SIZE); + changed = memcmp(xcvr->cap_ds, ucontrol->value.bytes.data, + sizeof(xcvr->cap_ds)) != 0; + memcpy(xcvr->cap_ds, ucontrol->value.bytes.data, + sizeof(xcvr->cap_ds)); - return 0; + return changed; } static struct snd_kcontrol_new fsl_xcvr_earc_capds_kctl = { @@ -1040,10 +1044,15 @@ static int fsl_xcvr_tx_cs_put(struct snd_kcontrol *kcontrol, { struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai); + int changed; - memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status, 24); + changed = memcmp(xcvr->tx_iec958.status, + ucontrol->value.iec958.status, + sizeof(xcvr->tx_iec958.status)) != 0; + memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status, + sizeof(xcvr->tx_iec958.status)); - return 0; + return changed; } static struct snd_kcontrol_new fsl_xcvr_rx_ctls[] = {
diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 1637cc3f..849ae87 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c
@@ -1608,6 +1608,7 @@ int asoc_sdw_get_dai_type(u32 type) switch (type) { case SDCA_FUNCTION_TYPE_SMART_AMP: case SDCA_FUNCTION_TYPE_SIMPLE_AMP: + case SDCA_FUNCTION_TYPE_COMPANION_AMP: return SOC_SDW_DAI_TYPE_AMP; case SDCA_FUNCTION_TYPE_SMART_MIC: case SDCA_FUNCTION_TYPE_SIMPLE_MIC:
diff --git a/sound/soc/spacemit/k1_i2s.c b/sound/soc/spacemit/k1_i2s.c index 43481f3..5420ca2 100644 --- a/sound/soc/spacemit/k1_i2s.c +++ b/sound/soc/spacemit/k1_i2s.c
@@ -148,10 +148,6 @@ static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, u32 val; int ret; - val = readl(i2s->base + SSCR); - if (val & SSCR_SSE) - return 0; - dma_data = &i2s->playback_dma_data; if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) @@ -199,6 +195,9 @@ static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, } val = readl(i2s->base + SSCR); + if (val & SSCR_SSE) + return 0; + val &= ~SSCR_DW_32BYTE; val |= data_width; writel(val, i2s->base + SSCR);
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 75f82a9..2f5f620 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c
@@ -592,6 +592,7 @@ static __u32 reverse_bytes(__u32 b, int len) fallthrough; case 2: b = ((b & 0xaaaaaaaa) >> 1) | ((b & 0x55555555) << 1); + fallthrough; case 1: case 0: break;
diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 842ba5b..2e0c18e 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c
@@ -208,11 +208,18 @@ static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip, } /* - * MOTU MicroBook IIc - * Sample rate changes takes more than 2 seconds for this device. Clock - * validity request returns false during that period. + * Quirk for older MOTU AVB / hybrid interfaces + * + * These devices take more than 2 seconds to switch sample rate or + * clock source. During this period the clock validity request + * returns false, causing ALSA to fail prematurely. + * + * Affected models (all use vendor 0x07fd): + * - MicroBook IIc → 0x0004 + * - 1248, 624, 8A, UltraLite AVB, 8M, 16A, ... → 0x0005 */ - if (chip->usb_id == USB_ID(0x07fd, 0x0004)) { + if (chip->usb_id == USB_ID(0x07fd, 0x0004) || /* MicroBook IIc */ + chip->usb_id == USB_ID(0x07fd, 0x0005)) { /* 1248 / 624 / 8A / UltraLite AVB / ... */ count = 0; while ((!ret) && (count < 50)) {
diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index 3546ba9..2785600 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c
@@ -227,7 +227,7 @@ static void kill_midi_urbs(struct snd_usb_midi2_endpoint *ep, bool suspending) if (!ep) return; if (suspending) - ep->suspended = ep->running; + atomic_set(&ep->suspended, atomic_read(&ep->running)); atomic_set(&ep->running, 0); for (i = 0; i < ep->num_urbs; i++) { if (!ep->urbs[i].urb) @@ -1188,10 +1188,11 @@ void snd_usb_midi_v2_suspend_all(struct snd_usb_audio *chip) static void resume_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { - ep->running = ep->suspended; - if (ep->direction == STR_IN) + atomic_set(&ep->running, atomic_read(&ep->suspended)); + atomic_set(&ep->suspended, 0); + + if (ep->direction == STR_IN || atomic_read(&ep->running)) submit_io_urbs(ep); - /* FIXME: does it all? */ } void snd_usb_midi_v2_resume_all(struct snd_usb_audio *chip)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 0b4ecc2..17983d9 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c
@@ -2277,6 +2277,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x05e1, 0x0480, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x05fc, 0x0231, /* JBL Pebbles */ + QUIRK_FLAG_MIXER_PLAYBACK_LINEAR_VOL | QUIRK_FLAG_MIXER_CAPTURE_LINEAR_VOL | + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0624, 0x3d3f, /* AB13X USB Audio */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0644, 0x8043, /* TEAC UD-501/UD-501V2/UD-503/NT-503 */ @@ -2366,6 +2369,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x152a, 0x880a, /* NeuralDSP Quad Cortex */ 0), /* Doesn't have the vendor quirk which would otherwise apply */ + DEVICE_FLG(0x1532, 0x055e, /* Razer Nommo V2 X */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x154e, 0x1003, /* Denon DA-300USB */ @@ -2458,6 +2463,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b73, 0x0047, /* AlphaTheta EUPHONIA */ + QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2d95, 0x8011, /* VIVO USB-C HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 6673601..eff2964 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h
@@ -793,9 +793,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index c53cde4..4574982 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h
@@ -3,7 +3,6 @@ #define _LINUX_STDDEF_H - #ifndef __always_inline #define __always_inline __inline__ #endif @@ -36,6 +35,11 @@ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS +#ifdef __cplusplus +/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ +#define __DECLARE_FLEX_ARRAY(T, member) \ + T member[0] +#else /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union * @@ -52,3 +56,23 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif + +#ifndef __counted_by_le +#define __counted_by_le(m) +#endif + +#ifndef __counted_by_be +#define __counted_by_be(m) +#endif + +#ifndef __counted_by_ptr +#define __counted_by_ptr(m) +#endif + +#define __kernel_nonstring + +#endif /* _LINUX_STDDEF_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index f829b6f..fe30181 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -112,6 +112,10 @@ static void test_cubic(void) ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option"); + ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject, + "cwnd_event_tx_start reject nodelay option"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 5363743..3a41c51 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -190,7 +190,7 @@ static int getsetsockopt(void) fd = socket(AF_NETLINK, SOCK_RAW, 0); if (fd < 0) { log_err("Failed to create AF_NETLINK socket"); - return -1; + goto err; } buf.u32 = 1; @@ -211,6 +211,21 @@ static int getsetsockopt(void) } ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + /* Trick bpf_tcp_sock() with IPPROTO_TCP */ + close(fd); + fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP); + if (!ASSERT_OK_FD(fd, "socket")) + goto err; + + /* The BPF prog intercepts this before the kernel sees it, any + * optlen works. Go with 4 bytes for simplicity. + */ + buf.u32 = 1; + optlen = sizeof(buf.u32); + err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen); + if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)")) + goto err; + free(big_buf); close(fd); return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 56685fc..80e6315 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -507,6 +507,10 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok"); + ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject"); + ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds);
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ce18a4d..ebd5a1e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -16,6 +16,7 @@ #include "bpf_tracing_net.h" #include <bpf/bpf_tracing.h> +#include <errno.h> char _license[] SEC("license") = "GPL"; @@ -170,10 +171,18 @@ static void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } +bool nodelay_init_reject = false; +bool nodelay_cwnd_event_tx_start_reject = false; + SEC("struct_ops") void BPF_PROG(bpf_cubic_init, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); + int true_val = 1, ret; + + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_init_reject = true; bictcp_reset(ca); @@ -189,8 +198,13 @@ void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 now = tcp_jiffies32; + int true_val = 1, ret; __s32 delta; + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_cwnd_event_tx_start_reject = true; + delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while.
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index cb990a7..5e0b27e 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -149,6 +149,20 @@ int _setsockopt(struct bpf_sockopt *ctx) if (sk && sk->family == AF_NETLINK) goto out; + if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) { + struct bpf_tcp_sock *tp = bpf_tcp_sock(sk); + + if (tp) { + char saved_syn[60]; + + bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN, + &saved_syn, sizeof(saved_syn)); + goto consumed; + } + + goto out; + } + /* Make sure bpf_get_netns_cookie is callable. */ if (bpf_get_netns_cookie(NULL) == 0) @@ -224,6 +238,8 @@ int _setsockopt(struct bpf_sockopt *ctx) return 0; /* couldn't get sk storage */ storage->val = optval[0]; + +consumed: ctx->optlen = -1; /* BPF has consumed this option, don't call kernel * setsockopt handler. */
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index d487153..ed5a0011 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -29,6 +29,10 @@ unsigned int nr_syn = 0; unsigned int nr_fin = 0; unsigned int nr_hwtstamp = 0; +bool nodelay_est_ok = false; +bool nodelay_hdr_len_reject = false; +bool nodelay_write_hdr_reject = false; + /* Check the header received from the active side */ static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) { @@ -300,7 +304,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) SEC("sockops") int misc_estab(struct bpf_sock_ops *skops) { - int true_val = 1; + int true_val = 1, false_val = 0, ret; switch (skops->op) { case BPF_SOCK_OPS_TCP_LISTEN_CB: @@ -316,10 +320,19 @@ int misc_estab(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_hdr_len_reject = true; return handle_hdr_opt_len(skops); case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_write_hdr_reject = true; return handle_write_hdr_opt(skops); case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val)); + if (!ret) + nodelay_est_ok = true; return handle_passive_estab(skops); }
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 85ca4d1..82809d5 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -31,6 +31,7 @@ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ iou-zcrx.py \ + ipsec_vxlan.py \ irq.py \ loopback.sh \ nic_timestamp.py \
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index dd50cb8..8c132ac 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config
@@ -3,6 +3,10 @@ CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_INET6_ESP=y +CONFIG_INET6_ESP_OFFLOAD=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y @@ -14,3 +18,4 @@ CONFIG_NET_SCH_INGRESS=y CONFIG_UDMABUF=y CONFIG_VXLAN=y +CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py new file mode 100755 index 0000000..0740a4d --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
@@ -0,0 +1,204 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Traffic test for VXLAN + IPsec crypto-offload.""" + +import os + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx +from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + +# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849) +INNER_V4_LOCAL = "198.51.100.1" +INNER_V4_REMOTE = "198.51.100.2" +INNER_V6_LOCAL = "2001:db8:100::1" +INNER_V6_REMOTE = "2001:db8:100::2" + +# ESP parameters +SPI_OUT = "0x1000" +SPI_IN = "0x1001" +# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV +ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128" + + +def xfrm(args, host=None): + """Runs 'ip xfrm' via shell to preserve parentheses in algo names.""" + cmd(f"ip xfrm {args}", shell=True, host=host) + + +def check_xfrm_offload_support(): + """Skips if iproute2 lacks xfrm offload support.""" + out = cmd("ip xfrm state help", fail=False) + if "offload" not in out.stdout + out.stderr: + raise KsftSkipEx("iproute2 too old, missing xfrm offload") + + +def check_esp_hw_offload(cfg): + """Skips if device lacks esp-hw-offload support.""" + check_xfrm_offload_support() + try: + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + except (CmdExitFailure, IndexError) as e: + raise KsftSkipEx(f"can't query features: {e}") from e + if not feat.get("esp-hw-offload", {}).get("active"): + raise KsftSkipEx("Device does not support esp-hw-offload") + + +def get_tx_drops(cfg): + """Returns TX dropped counter from the physical device.""" + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + return stats["stats64"]["tx"]["dropped"] + + +def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver): + """Sets up VXLAN tunnel with IPsec transport-mode crypto-offload.""" + vxlan_name = f"vx{os.getpid()}" + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + if inner_ipver == "4": + inner_local = f"{INNER_V4_LOCAL}/24" + inner_remote = f"{INNER_V4_REMOTE}/24" + addr_extra = "" + else: + inner_local = f"{INNER_V6_LOCAL}/64" + inner_remote = f"{INNER_V6_REMOTE}/64" + addr_extra = " nodad" + + if outer_ipver == "6": + vxlan_opts = "udp6zerocsumtx udp6zerocsumrx" + else: + vxlan_opts = "noudpcsum" + + # VXLAN tunnel - local side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {vxlan_name}") + ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}") + ip(f"link set {vxlan_name} up") + + # VXLAN tunnel - remote side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {vxlan_name}", host=cfg.remote) + ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}", + host=cfg.remote) + ip(f"link set {vxlan_name} up", host=cfg.remote) + + # xfrm state - local outbound SA + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir out") + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}") + + # xfrm state - local inbound SA + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir in") + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}") + + # xfrm state - remote outbound SA (mirror, software crypto) + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}", host=cfg.remote) + + # xfrm state - remote inbound SA (mirror, software crypto) + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}", host=cfg.remote) + + # xfrm policy - local out + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport") + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out") + + # xfrm policy - local in + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport") + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in") + + # xfrm policy - remote out + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out", host=cfg.remote) + + # xfrm policy - remote in + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in", host=cfg.remote) + + +def _vxlan_ipsec_variants(): + """Generates outer/inner IP version variants.""" + for outer in ["4", "6"]: + for inner in ["4", "6"]: + yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner) + + +@ksft_variants(_vxlan_ipsec_variants()) +def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver): + """Tests VXLAN+IPsec crypto-offload has no TX drops.""" + cfg.require_ipver(outer_ipver) + check_esp_hw_offload(cfg) + + setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver) + + if inner_ipver == "4": + inner_local = INNER_V4_LOCAL + inner_remote = INNER_V4_REMOTE + ping = "ping" + else: + inner_local = INNER_V6_LOCAL + inner_remote = INNER_V6_REMOTE + ping = "ping -6" + + cmd(f"{ping} -c 1 -W 2 {inner_remote}") + + drops_before = get_tx_drops(cfg) + + runner = Iperf3Runner(cfg, server_ip=inner_local, + client_ip=inner_remote) + bw_gbps = runner.measure_bandwidth(reverse=True) + + cfg.wait_hw_stats_settle() + drops_after = get_tx_drops(cfg) + + ksft_eq(drops_after - drops_before, 0, + comment="TX drops during VXLAN+IPsec") + ksft_ge(bw_gbps, 0.1, + comment="Minimum 100Mbps over VXLAN+IPsec") + + +def main(): + """Runs VXLAN+IPsec crypto-offload GSO selftest.""" + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index f181fa2..e24660e 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -48,7 +48,10 @@ Starts the iperf3 client with the configured options. """ cmdline = self._build_client(streams, duration, reverse) - return cmd(cmdline, background=background, host=self.env.remote) + kwargs = {"background": background, "host": self.env.remote} + if not background: + kwargs["timeout"] = duration + 5 + return cmd(cmdline, **kwargs) def measure_bandwidth(self, reverse=False): """
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 6d809f0..60838b6 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h
@@ -450,7 +450,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ... */ if (ksft_plan || ksft_test_num()) { ksft_cnt.ksft_xskip++; - printf("ok %u # SKIP ", 1 + ksft_test_num()); + printf("ok %u # SKIP ", ksft_test_num()); } else { printf("1..0 # SKIP "); }
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 75fb016..cfdce9c 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h
@@ -76,7 +76,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) memset(s, c, n); } -#define KSELFTEST_PRIO_TEST_F 20000 +#define KSELFTEST_PRIO_TEST 20000 #define KSELFTEST_PRIO_XFAIL 20001 #define TEST_TIMEOUT_DEFAULT 30 @@ -194,7 +194,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) .fixture = &_fixture_global, \ .termsig = _signal, \ .timeout = TEST_TIMEOUT_DEFAULT, }; \ - static void __attribute__((constructor)) _register_##test_name(void) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) _register_##test_name(void) \ { \ __register_test(&_##test_name##_object); \ } \ @@ -238,7 +238,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) FIXTURE_VARIANT(fixture_name); \ static struct __fixture_metadata _##fixture_name##_fixture_object = \ { .name = #fixture_name, }; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_data(void) \ { \ __register_fixture(&_##fixture_name##_fixture_object); \ @@ -364,7 +364,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) _##fixture_name##_##variant_name##_object = \ { .name = #variant_name, \ .data = &_##fixture_name##_##variant_name##_variant}; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_##variant_name(void) \ { \ __register_fixture_variant(&_##fixture_name##_fixture_object, \ @@ -468,7 +468,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) fixture_name##_teardown(_metadata, self, variant); \ } \ static struct __test_metadata *_##fixture_name##_##test_name##_object; \ - static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_##test_name(void) \ { \ struct __test_metadata *object = mmap(NULL, sizeof(*object), \ @@ -1323,7 +1323,7 @@ static int test_harness_run(int argc, char **argv) return KSFT_FAIL; } -static void __attribute__((constructor)) __constructor_order_first(void) +static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) __constructor_order_first(void) { __constructor_order_forward = true; }
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index a275ed5..f3da38c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile
@@ -96,6 +96,7 @@ srv6_hl2encap_red_l2vpn_test.sh \ srv6_iptunnel_cache.sh \ stress_reuseport_listen.sh \ + tcp_ecmp_failover.sh \ tcp_fastopen_backup_key.sh \ test_bpf.sh \ test_bridge_backup_port.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 5fea7e7..989a597 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -474,20 +474,24 @@ wait_local_port_listen "${@}" "tcp" } +# $1: error file, $2: cmd, $3: expected msg, [$4: expected error] mptcp_lib_check_output() { local err="${1}" local cmd="${2}" local expected="${3}" + local exp_error="${4:-0}" local cmd_ret=0 local out - if ! out=$(${cmd} 2>"${err}"); then - cmd_ret=${?} - fi + out=$(${cmd} 2>"${err}") || cmd_ret=1 - if [ ${cmd_ret} -ne 0 ]; then - mptcp_lib_pr_fail "command execution '${cmd}' stderr" - cat "${err}" + if [ "${cmd_ret}" != "${exp_error}" ]; then + mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:" + if [ "${exp_error}" = 0 ]; then + cat "${err}" + else + echo "${out}" + fi return 2 elif [ "${out}" = "${expected}" ]; then return 0
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 123d9d7..04594df 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -122,10 +122,12 @@ local cmd="$1" local expected="$2" local msg="$3" + local exp_error="$4" local rc=0 mptcp_lib_print_title "$msg" - mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" || + rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" ret=${KSFT_FAIL} @@ -158,13 +160,13 @@ "3,10.0.1.3,signal backup")" "dump addrs" del_endpoint 2 -check "get_endpoint 2" "" "simple del addr" +check "get_endpoint 2" "" "simple del addr" 1 check "show_endpoints" \ "$(format_endpoints "1,10.0.1.1" \ "3,10.0.1.3,signal backup")" "dump addrs after del" add_endpoint 10.0.1.3 2>/dev/null -check "get_endpoint 4" "" "duplicate addr" +check "get_endpoint 4" "" "duplicate addr" 1 add_endpoint 10.0.1.4 flags signal check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" @@ -173,7 +175,7 @@ add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" -check "get_endpoint 10" "" "above hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" 1 del_endpoint 9 for i in $(seq 10 255); do @@ -192,9 +194,13 @@ flush_endpoint check "show_endpoints" "" "flush addrs" -add_endpoint 10.0.1.1 flags unknown -check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" -flush_endpoint +# "unknown" flag is only supported by pm_nl_ctl +if ! mptcp_lib_is_ip_mptcp; then + add_endpoint 10.0.1.1 flags unknown + check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \ + "ignore unknown flags" + flush_endpoint +fi set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit"
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index b327d30..3cdd953 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -26,6 +26,7 @@ netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces tunnel_metadata ovs: test extraction of tunnel metadata + tunnel_refcount ovs: test tunnel vport reference cleanup drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -830,6 +831,42 @@ return 0 } +test_tunnel_refcount() { + sbxname="test_tunnel_refcount" + sbx_add "${sbxname}" || return 1 + + ovs_sbx "${sbxname}" ip netns add trefns || return 1 + on_exit "ovs_sbx ${sbxname} ip netns del trefns" + + for tun_type in gre vxlan geneve; do + info "testing ${tun_type} tunnel vport refcount" + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-dp dp-${tun_type} || return 1 + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-if --no-lwt -t ${tun_type} \ + dp-${tun_type} ovs-${tun_type}0 || return 1 + + ovs_wait ip -netns trefns link show \ + ovs-${tun_type}0 >/dev/null 2>&1 || return 1 + + info "deleting dp - may hang if reference counting is broken" + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + del-dp dp-${tun_type} & + + dev_removed() { + ! ip -netns trefns link show "$1" >/dev/null 2>&1 + } + ovs_wait dev_removed dp-${tun_type} || return 1 + ovs_wait dev_removed ovs-${tun_type}0 || return 1 + done + return 0 +} + run_test() { ( tname="$1"
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 848f61f..bbe35e2 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -11,7 +11,6 @@ import math import multiprocessing import re -import socket import struct import sys import time @@ -2069,7 +2068,7 @@ elif vport_type == "internal": return OvsVport.OVS_VPORT_TYPE_INTERNAL elif vport_type == "gre": - return OvsVport.OVS_VPORT_TYPE_INTERNAL + return OvsVport.OVS_VPORT_TYPE_GRE elif vport_type == "vxlan": return OvsVport.OVS_VPORT_TYPE_VXLAN elif vport_type == "geneve": @@ -2121,6 +2120,7 @@ ) TUNNEL_DEFAULTS = [("geneve", 6081), + ("gre", 0), ("vxlan", 4789)] for tnl in TUNNEL_DEFAULTS: @@ -2129,9 +2129,13 @@ dport = tnl[1] if not lwt: + if tnl[0] == "gre": + # GRE tunnels have no options. + break + vportopt = OvsVport.ovs_vport_msg.vportopts() vportopt["attrs"].append( - ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ["OVS_TUNNEL_ATTR_DST_PORT", dport] ) msg["attrs"].append( ["OVS_VPORT_ATTR_OPTIONS", vportopt] @@ -2145,6 +2149,9 @@ geneve_port=dport, geneve_collect_metadata=True, geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "gre": + ipr.link("add", ifname=vport_ifname, kind="gretap", + gre_collect_metadata=True) elif tnl[0] == "vxlan": ipr.link("add", ifname=vport_ifname, kind=tnl[0], vxlan_learning=0, vxlan_collect_metadata=1, @@ -2563,7 +2570,7 @@ if vpo: dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") if dpo: - opts += " tnl-dport:%s" % socket.ntohs(dpo) + opts += " tnl-dport:%s" % dpo print( " port %d: %s (%s%s)" % ( @@ -2632,7 +2639,7 @@ "--ptype", type=str, default="netdev", - choices=["netdev", "internal", "geneve", "vxlan"], + choices=["netdev", "internal", "gre", "geneve", "vxlan"], help="Interface type (default netdev)", ) addifcmd.add_argument( @@ -2645,7 +2652,7 @@ addifcmd.add_argument( "-l", "--lwt", - type=bool, + action=argparse.BooleanOptionalAction, default=True, help="Use LWT infrastructure instead of vport (default true)." )
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index b50dbe4..c06e313 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -98,10 +98,10 @@ sleep 0.3 ovpn_cmd_ok "send baseline traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -w 3 5.5.5.$((p + 1)) ovpn_cmd_ok "send large-payload traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -s 3000 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -s 3000 -w 3 5.5.5.$((p + 1)) wait "${tcpdump_pid1}" || return 1 wait "${tcpdump_pid2}" || return 1
diff --git a/tools/testing/selftests/net/tcp_ecmp_failover.sh b/tools/testing/selftests/net/tcp_ecmp_failover.sh new file mode 100755 index 0000000..5768aa8 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ecmp_failover.sh
@@ -0,0 +1,216 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2026 Google LLC. +# +# This test verifies TCP flow failover between ECMP routes +# upon carrier loss on the active device. +# +# socat -----------------------------> socat +# | +# .-- veth-c1 -|- veth-s1 --. +# dummy0 -| | |-- dummy0 +# '-- veth-c2 -|- veth-s2 --' +# | +# + +REQUIRE_JQ=no +REQUIRE_MZ=no +NUM_NETIFS=0 + +source forwarding/lib.sh + +CLIENT_IP="10.0.59.1" +SERVER_IP="10.0.92.1" +CLIENT_IP6="2001:db8:5a9a::1" +SERVER_IP6="2001:db8:9292::1" + +setup_server() +{ + IP="ip -n $server" + NS_EXEC="ip netns exec $server" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $SERVER_IP/32 dev dummy0 + $IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad + + $IP link set veth-s1 up + $IP link set veth-s2 up + + $IP -4 addr add 192.168.1.2/24 dev veth-s1 + $IP -4 addr add 192.168.2.2/24 dev veth-s2 + + $IP -4 route add $CLIENT_IP/32 \ + nexthop via 192.168.1.1 dev veth-s1 weight 1 \ + nexthop via 192.168.2.1 dev veth-s2 weight 1 + + $IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad + $IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad + + $IP -6 route add $CLIENT_IP6/128 \ + nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \ + nexthop via 2001:db8:2::1 dev veth-s2 weight 1 +} + +setup_client() +{ + IP="ip -n $client" + NS_EXEC="ip netns exec $client" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $CLIENT_IP/32 dev dummy0 + $IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad + + $IP link set veth-c1 up + $IP link set veth-c2 up + + $IP -4 addr add 192.168.1.1/24 dev veth-c1 + $IP -4 addr add 192.168.2.1/24 dev veth-c2 + + $IP -4 route add $SERVER_IP/32 \ + nexthop via 192.168.1.2 dev veth-c1 weight 1 \ + nexthop via 192.168.2.2 dev veth-c2 weight 1 + + $IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad + $IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad + + $IP -6 route add $SERVER_IP6/128 \ + nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \ + nexthop via 2001:db8:2::2 dev veth-c2 weight 1 + + # By default, tcp_retries1=3 triggers a route refresh + # after 3 retransmits (~5s). Ensure this never occurs + # for test stability. + $NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100 + + # When NETDEV_CHANGE is issued for a dev tied to an ECMP + # route, RTNH_F_LINKDOWN is flagged and the sernum is + # bumped to invalidate the route via sk_dst_check(). + # + # Without ignore_routes_with_linkdown=1, subsequent + # lookups may still select the same RTNH_F_LINKDOWN route. + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1 + + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1 +} + +setup() +{ + setup_ns client server + + ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server" + ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server" + + setup_server + setup_client +} + +cleanup() +{ + cleanup_all_ns > /dev/null 2>&1 +} + +tcp_ecmp_failover() +{ + local pf=$1; shift + local server_ip=$1; shift + local client_ip=$1; shift + + RET=0 + + tcpdump_start veth-s1 "$server" + tcpdump_start veth-s2 "$server" + + ip netns exec "$server" \ + socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null & + server_pid=$! + + # Wait for server to start listening. + # Sometimes client fails without this sleep. + sleep 1 + + ip netns exec "$client" \ + socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" & + client_pid=$! + + # To capture enough packets. + sleep 3 + + tcpdump_stop veth-s1 + tcpdump_stop veth-s2 + + pkts_s1=$(tcpdump_show veth-s1 | wc -l) + pkts_s2=$(tcpdump_show veth-s2 | wc -l) + + tcpdump_cleanup veth-s1 + tcpdump_cleanup veth-s2 + + # Detect the device chosen by the client + if [ "$pkts_s1" -gt "$pkts_s2" ]; then + veth_down=veth-s1 + veth_up=veth-s2 + else + veth_down=veth-s2 + veth_up=veth-s1 + fi + + # Taking down $veth_down causes its peer to lose carrier, + # triggering NETDEV_CHANGE. This flags RTNH_F_LINKDOWN + # and bumps the sernum for the route associated with that + # peer, invalidating the cached dst in the TCP socket. + # + # Consequently, sk_dst_check() fails, forcing the subsequent + # lookup to select the remaining healthy route via $veth_up. + ip -n "$server" link set "$veth_down" down + + tcpdump_start "$veth_up" "$server" + + # To capture enough packets. + sleep 3 + + tcpdump_stop "$veth_up" + + kill -9 "$client_pid" > /dev/null 2>&1 + kill -9 "$server_pid" > /dev/null 2>&1 + wait 2> /dev/null + + pkts=$(tcpdump_show $veth_up | wc -l) + + tcpdump_cleanup "$veth_up" + + if [ "$pkts" -lt 1000 ]; then + RET=$ksft_fail + fi +} + +test_ipv4() +{ + setup + tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP + log_test "TCP IPv4 failover" + cleanup +} + +test_ipv6() +{ + setup + tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]" + log_test "TCP IPv6 failover" + cleanup +} + +require_command socat +require_command tcpdump + +trap cleanup EXIT + +test_ipv4 +test_ipv6 + +exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9e2ccea..30a236b 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c
@@ -946,6 +946,49 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_to_pipe_small) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + size_t total = 0; + int p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + + /* Shrink pipe to 1 page (typically 4096 bytes) to force multiple + * splice iterations for a 16384-byte TLS record. + */ + EXPECT_GE(fcntl(p[1], F_SETPIPE_SZ, 4096), 4096); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + while (total < (size_t)send_len) { + ssize_t spliced, drained; + + spliced = splice(self->cfd, NULL, p[1], NULL, + send_len - total, 0); + EXPECT_GT(spliced, 0); + if (spliced <= 0) + break; + + drained = read(p[0], mem_recv + total, spliced); + EXPECT_EQ(drained, spliced); + if (drained <= 0) + break; + + total += drained; + } + + EXPECT_EQ(total, (size_t)send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + close(p[0]); + close(p[1]); +} + #define MAX_FRAGS 48 TEST_F(tls, splice_short) {
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 4ef9082..50d69e2 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile
@@ -14,14 +14,20 @@ # still track changes to header files and depend on shared object. OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ - param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \ - syscall_errors_test slice_test +TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ + param_test_benchmark param_test_mm_cid_benchmark -TEST_GEN_PROGS_EXTENDED = librseq.so +TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test \ + param_test_compare_twice \ + param_test_mm_cid \ + param_test_mm_cid_compare_twice \ + syscall_errors_test \ + legacy_check \ + slice_test \ + check_optimized -TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh +TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh run_timeslice_test.sh TEST_FILES := settings @@ -62,3 +68,6 @@ $(OUTPUT)/slice_test: slice_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/check_optimized: check_optimized.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/check_optimized.c b/tools/testing/selftests/rseq/check_optimized.c new file mode 100644 index 0000000..a13e3f2 --- /dev/null +++ b/tools/testing/selftests/rseq/check_optimized.c
@@ -0,0 +1,17 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/time.h> + +#include "rseq.h" + +int main(int argc, char **argv) +{ + if (__rseq_register_current_thread(true, false)) + return -1; + return 0; +}
diff --git a/tools/testing/selftests/rseq/legacy_check.c b/tools/testing/selftests/rseq/legacy_check.c new file mode 100644 index 0000000..3f7de4e --- /dev/null +++ b/tools/testing/selftests/rseq/legacy_check.c
@@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <errno.h> +#include <signal.h> +#include <stdint.h> +#include <unistd.h> + +#include "rseq.h" + +#include "../kselftest_harness.h" + +FIXTURE(legacy) +{ +}; + +static int cpu_id_in_sigfn = -1; + +static void sigfn(int sig) +{ + struct rseq_abi *rs = rseq_get_abi(); + + cpu_id_in_sigfn = rs->cpu_id_start; +} + +FIXTURE_SETUP(legacy) +{ + int res = __rseq_register_current_thread(true, true); + + switch (res) { + case -ENOSYS: + SKIP(return, "RSEQ not enabled\n"); + case -EBUSY: + SKIP(return, "GLIBC owns RSEQ. Disable GLIBC RSEQ registration\n"); + default: + ASSERT_EQ(res, 0); + } + + ASSERT_NE(signal(SIGUSR1, sigfn), SIG_ERR); +} + +FIXTURE_TEARDOWN(legacy) +{ +} + +TEST_F(legacy, legacy_test) +{ + struct rseq_abi *rs = rseq_get_abi(); + + ASSERT_NE(rs, NULL); + + /* Overwrite rs::cpu_id_start */ + rs->cpu_id_start = -1; + sleep(1); + ASSERT_NE(rs->cpu_id_start, -1); + + rs->cpu_id_start = -1; + ASSERT_EQ(raise(SIGUSR1), 0); + ASSERT_NE(rs->cpu_id_start, -1); + ASSERT_NE(cpu_id_in_sigfn, -1); +} + +TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 05d03e6..e1e98db 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c
@@ -38,7 +38,7 @@ static int opt_modulo, verbose; static int opt_yield, opt_signal, opt_sleep, opt_disable_rseq, opt_threads = 200, opt_disable_mod = 0, opt_test = 's'; - +static bool opt_rseq_legacy; static long long opt_reps = 5000; static __thread __attribute__((tls_model("initial-exec"))) @@ -281,9 +281,12 @@ unsigned int yield_mod_cnt, nr_abort; } \ } +#define rseq_no_glibc true + #else #define printf_verbose(fmt, ...) +#define rseq_no_glibc false #endif /* BENCHMARK */ @@ -481,7 +484,7 @@ void *test_percpu_spinlock_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -558,7 +561,7 @@ void *test_percpu_inc_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -712,7 +715,7 @@ void *test_percpu_list_thread(void *arg) long long i, reps; struct percpu_list *list = (struct percpu_list *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -895,7 +898,7 @@ void *test_percpu_buffer_thread(void *arg) long long i, reps; struct percpu_buffer *buffer = (struct percpu_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1105,7 +1108,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) long long i, reps; struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1258,7 +1261,7 @@ void *test_membarrier_worker_thread(void *arg) const int iters = opt_reps; int i; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1323,7 +1326,7 @@ void *test_membarrier_manager_thread(void *arg) intptr_t expect_a = 0, expect_b = 0; int cpu_a = 0, cpu_b = 0; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1475,6 +1478,7 @@ static void show_usage(int argc, char **argv) printf(" [-D M] Disable rseq for each M threads\n"); printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); + printf(" [-O] Test with optimized RSEQ\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); printf("\n"); @@ -1602,6 +1606,9 @@ int main(int argc, char **argv) case 'M': opt_mo = RSEQ_MO_RELEASE; break; + case 'L': + opt_rseq_legacy = true; + break; default: show_usage(argc, argv); goto error; @@ -1618,7 +1625,7 @@ int main(int argc, char **argv) if (set_signal_handler()) goto error; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) goto error; if (!opt_disable_rseq && !rseq_validate_cpu_id()) { fprintf(stderr, "Error: cpu id getter unavailable\n");
diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h index ecef315..5f4ea21 100644 --- a/tools/testing/selftests/rseq/rseq-abi.h +++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -192,9 +192,14 @@ struct rseq_abi { struct rseq_abi_slice_ctrl slice_ctrl; /* + * Place holder to push the size above 32 bytes. + */ + __u8 __reserved; + + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(256))); #endif /* _RSEQ_ABI_H */
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index a736727..be0d0a9 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c
@@ -56,6 +56,7 @@ ptrdiff_t rseq_offset; * unsuccessful. */ unsigned int rseq_size = -1U; +static unsigned int rseq_alloc_size; /* Flags used during rseq registration. */ unsigned int rseq_flags; @@ -115,29 +116,17 @@ bool rseq_available(void) } } -/* The rseq areas need to be at least 32 bytes. */ -static -unsigned int get_rseq_min_alloc_size(void) -{ - unsigned int alloc_size = rseq_size; - - if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) - alloc_size = ORIG_RSEQ_ALLOC_SIZE; - return alloc_size; -} - /* * Return the feature size supported by the kernel. * * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): * - * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). * * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. */ -static -unsigned int get_rseq_kernel_feature_size(void) +static unsigned int get_rseq_kernel_feature_size(void) { unsigned long auxv_rseq_feature_size, auxv_rseq_align; @@ -152,15 +141,24 @@ unsigned int get_rseq_kernel_feature_size(void) return ORIG_RSEQ_FEATURE_SIZE; } -int rseq_register_current_thread(void) +int __rseq_register_current_thread(bool nolibc, bool legacy) { + unsigned int size; int rc; if (!rseq_ownership) { /* Treat libc's ownership as a successful registration. */ - return 0; + return nolibc ? -EBUSY : 0; } - rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + + /* The minimal allocation size is 32, which is the legacy allocation size */ + size = get_rseq_kernel_feature_size(); + if (legacy || size < ORIG_RSEQ_ALLOC_SIZE) + rseq_alloc_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_alloc_size = size; + + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, 0, RSEQ_SIG); if (rc) { /* * After at least one thread has registered successfully @@ -179,9 +177,8 @@ int rseq_register_current_thread(void) * The first thread to register sets the rseq_size to mimic the libc * behavior. */ - if (RSEQ_READ_ONCE(rseq_size) == 0) { - RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); - } + if (RSEQ_READ_ONCE(rseq_size) == 0) + RSEQ_WRITE_ONCE(rseq_size, size); return 0; } @@ -194,7 +191,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0;
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index f51a5fd..c62ebb9 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h
@@ -8,6 +8,7 @@ #ifndef RSEQ_H #define RSEQ_H +#include <assert.h> #include <stdint.h> #include <stdbool.h> #include <pthread.h> @@ -142,7 +143,12 @@ static inline struct rseq_abi *rseq_get_abi(void) * succeed. A restartable sequence executed from a non-registered * thread will always fail. */ -int rseq_register_current_thread(void); +int __rseq_register_current_thread(bool nolibc, bool legacy); + +static inline int rseq_register_current_thread(void) +{ + return __rseq_register_current_thread(false, false); +} /* * Unregister rseq for current thread.
diff --git a/tools/testing/selftests/rseq/run_legacy_check.sh b/tools/testing/selftests/rseq/run_legacy_check.sh new file mode 100755 index 0000000..5577b46 --- /dev/null +++ b/tools/testing/selftests/rseq/run_legacy_check.sh
@@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./legacy_check
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index 8d31426..69a3fa0 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -34,6 +34,11 @@ SLOW_REPS=100 NR_THREADS=$((6*${NR_CPUS})) +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy and +# performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + function do_tests() { local i=0 @@ -103,6 +108,40 @@ NR_LOOPS= } +echo "Testing in legacy RSEQ mode" +echo "Yield injection (25%)" +inject_blocking -m 4 -y -L + +echo "Yield injection (50%)" +inject_blocking -m 2 -y -L + +echo "Yield injection (100%)" +inject_blocking -m 1 -y -L + +echo "Kill injection (25%)" +inject_blocking -m 4 -k -L + +echo "Kill injection (50%)" +inject_blocking -m 2 -k -L + +echo "Kill injection (100%)" +inject_blocking -m 1 -k -L + +echo "Sleep injection (1ms, 25%)" +inject_blocking -m 4 -s 1 -L + +echo "Sleep injection (1ms, 50%)" +inject_blocking -m 2 -s 1 -L + +echo "Sleep injection (1ms, 100%)" +inject_blocking -m 1 -s 1 -L + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +echo "Testing in optimized RSEQ mode" echo "Yield injection (25%)" inject_blocking -m 4 -y
diff --git a/tools/testing/selftests/rseq/run_timeslice_test.sh b/tools/testing/selftests/rseq/run_timeslice_test.sh new file mode 100755 index 0000000..551ebed --- /dev/null +++ b/tools/testing/selftests/rseq/run_timeslice_test.sh
@@ -0,0 +1,14 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ + +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy +# and performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +./slice_test
diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c index 357122d..e402d44 100644 --- a/tools/testing/selftests/rseq/slice_test.c +++ b/tools/testing/selftests/rseq/slice_test.c
@@ -124,6 +124,13 @@ FIXTURE_SETUP(slice_ext) { cpu_set_t affinity; + if (__rseq_register_current_thread(true, false)) + SKIP(return, "RSEQ not supported\n"); + + if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, + PR_RSEQ_SLICE_EXT_ENABLE, 0, 0)) + SKIP(return, "Time slice extension not supported\n"); + ASSERT_EQ(sched_getaffinity(0, sizeof(affinity), &affinity), 0); /* Pin it on a single CPU. Avoid CPU 0 */ @@ -137,11 +144,6 @@ FIXTURE_SETUP(slice_ext) break; } - ASSERT_EQ(rseq_register_current_thread(), 0); - - ASSERT_EQ(prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, - PR_RSEQ_SLICE_EXT_ENABLE, 0, 0), 0); - self->noise_params.noise_nsecs = variant->noise_nsecs; self->noise_params.sleep_nsecs = variant->sleep_nsecs; self->noise_params.run = 1;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index eefadd0..b1f856c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1136,5 +1136,153 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "7a5f", + "name": "Force red to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "red", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "cdae", + "name": "Force sfb to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "291d", + "name": "Force red to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "red", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "9c6d", + "name": "Force sfb to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ]