| From f158116100c2b0043997be5098d3bfdc5ebbb786 Mon Sep 17 00:00:00 2001 |
| From: Julien Grall <julien.grall@arm.com> |
| Date: Tue, 21 May 2019 18:21:39 +0100 |
| Subject: [PATCH] arm64/fpsimd: Don't disable softirq when touching FPSIMD/SVE |
| state |
| |
| commit 6dcdefcde413c1068b394eeabdfdf6a85213ebe2 upstream. |
| |
| When the kernel is compiled with CONFIG_KERNEL_MODE_NEON, some part of |
| the kernel may be able to use FPSIMD/SVE. This is for instance the case |
| for crypto code. |
| |
| Any use of FPSIMD/SVE in the kernel are clearly marked by using the |
| function kernel_neon_{begin, end}. Furthermore, this can only be used |
| when may_use_simd() returns true. |
| |
| The current implementation of may_use_simd() allows softirq to use |
| FPSIMD/SVE unless it is currently in use (i.e kernel_neon_busy is true). |
| When in use, softirqs usually fall back to a software method. |
| |
| At the moment, as a softirq may use FPSIMD/SVE, softirqs are disabled |
| when touching the FPSIMD/SVE context. This has the drawback to disable |
| all softirqs even if they are not using FPSIMD/SVE. |
| |
| Since a softirq is supposed to check may_use_simd() anyway before |
| attempting to use FPSIMD/SVE, there is limited reason to keep softirq |
| disabled when touching the FPSIMD/SVE context. Instead, we can simply |
| disable preemption and mark the FPSIMD/SVE context as in use by setting |
| CPU's fpsimd_context_busy flag. |
| |
| Two new helpers {get, put}_cpu_fpsimd_context are introduced to mark |
| the area using FPSIMD/SVE context and they are used to replace |
| local_bh_{disable, enable}. The functions kernel_neon_{begin, end} are |
| also re-implemented to use the new helpers. |
| |
| Additionally, double-underscored versions of the helpers are provided to |
| called when preemption is already disabled. These are only relevant on |
| paths where irqs are disabled anyway, so they are not needed for |
| correctness in the current code. Let's use them anyway though: this |
| marks critical sections clearly and will help to avoid mistakes during |
| future maintenance. |
| |
| The change has been benchmarked on Linux 5.1-rc4 with defconfig. |
| |
| On Juno2: |
| * hackbench 100 process 1000 (10 times) |
| * .7% quicker |
| |
| On ThunderX 2: |
| * hackbench 1000 process 1000 (20 times) |
| * 3.4% quicker |
| |
| Reviewed-by: Dave Martin <dave.martin@arm.com> |
| Acked-by: Marc Zyngier <marc.zyngier@arm.com> |
| Signed-off-by: Julien Grall <julien.grall@arm.com> |
| Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h |
| index 7e245b9e03a5..7434844036d3 100644 |
| --- a/arch/arm64/include/asm/simd.h |
| +++ b/arch/arm64/include/asm/simd.h |
| @@ -12,9 +12,9 @@ |
| #include <linux/preempt.h> |
| #include <linux/types.h> |
| |
| -#ifdef CONFIG_KERNEL_MODE_NEON |
| +DECLARE_PER_CPU(bool, fpsimd_context_busy); |
| |
| -DECLARE_PER_CPU(bool, kernel_neon_busy); |
| +#ifdef CONFIG_KERNEL_MODE_NEON |
| |
| /* |
| * may_use_simd - whether it is allowable at this time to issue SIMD |
| @@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); |
| static __must_check inline bool may_use_simd(void) |
| { |
| /* |
| - * kernel_neon_busy is only set while preemption is disabled, |
| + * fpsimd_context_busy is only set while preemption is disabled, |
| * and is clear whenever preemption is enabled. Since |
| - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy |
| + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy |
| * cannot change under our feet -- if it's set we cannot be |
| * migrated, and if it's clear we cannot be migrated to a CPU |
| * where it is set. |
| */ |
| return !in_irq() && !irqs_disabled() && !in_nmi() && |
| - !this_cpu_read(kernel_neon_busy); |
| + !this_cpu_read(fpsimd_context_busy); |
| } |
| |
| #else /* ! CONFIG_KERNEL_MODE_NEON */ |
| diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c |
| index 1041c6a8f072..eec4776ae5f0 100644 |
| --- a/arch/arm64/kernel/fpsimd.c |
| +++ b/arch/arm64/kernel/fpsimd.c |
| @@ -82,7 +82,8 @@ |
| * To prevent this from racing with the manipulation of the task's FPSIMD state |
| * from task context and thereby corrupting the state, it is necessary to |
| * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE |
| - * flag with local_bh_disable() unless softirqs are already masked. |
| + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to |
| + * run but prevent them to use FPSIMD. |
| * |
| * For a certain task, the sequence may look something like this: |
| * - the task gets scheduled in; if both the task's fpsimd_cpu field |
| @@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state; |
| |
| #endif /* ! CONFIG_ARM64_SVE */ |
| |
| +DEFINE_PER_CPU(bool, fpsimd_context_busy); |
| +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); |
| + |
| +static void __get_cpu_fpsimd_context(void) |
| +{ |
| + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); |
| + |
| + WARN_ON(busy); |
| +} |
| + |
| +/* |
| + * Claim ownership of the CPU FPSIMD context for use by the calling context. |
| + * |
| + * The caller may freely manipulate the FPSIMD context metadata until |
| + * put_cpu_fpsimd_context() is called. |
| + * |
| + * The double-underscore version must only be called if you know the task |
| + * can't be preempted. |
| + */ |
| +static void get_cpu_fpsimd_context(void) |
| +{ |
| + preempt_disable(); |
| + __get_cpu_fpsimd_context(); |
| +} |
| + |
| +static void __put_cpu_fpsimd_context(void) |
| +{ |
| + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); |
| + |
| + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ |
| +} |
| + |
| +/* |
| + * Release the CPU FPSIMD context. |
| + * |
| + * Must be called from a context in which get_cpu_fpsimd_context() was |
| + * previously called, with no call to put_cpu_fpsimd_context() in the |
| + * meantime. |
| + */ |
| +static void put_cpu_fpsimd_context(void) |
| +{ |
| + __put_cpu_fpsimd_context(); |
| + preempt_enable(); |
| +} |
| + |
| +static bool have_cpu_fpsimd_context(void) |
| +{ |
| + return !preemptible() && __this_cpu_read(fpsimd_context_busy); |
| +} |
| + |
| /* |
| * Call __sve_free() directly only if you know task can't be scheduled |
| * or preempted. |
| @@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task) |
| * This function should be called only when the FPSIMD/SVE state in |
| * thread_struct is known to be up to date, when preparing to enter |
| * userspace. |
| - * |
| - * Softirqs (and preemption) must be disabled. |
| */ |
| static void task_fpsimd_load(void) |
| { |
| - WARN_ON(!in_softirq() && !irqs_disabled()); |
| + WARN_ON(!have_cpu_fpsimd_context()); |
| |
| if (system_supports_sve() && test_thread_flag(TIF_SVE)) |
| sve_load_state(sve_pffr(¤t->thread), |
| @@ -233,8 +282,6 @@ static void task_fpsimd_load(void) |
| /* |
| * Ensure FPSIMD/SVE storage in memory for the loaded context is up to |
| * date with respect to the CPU registers. |
| - * |
| - * Softirqs (and preemption) must be disabled. |
| */ |
| static void fpsimd_save(void) |
| { |
| @@ -242,7 +289,7 @@ static void fpsimd_save(void) |
| this_cpu_ptr(&fpsimd_last_state); |
| /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ |
| |
| - WARN_ON(!in_softirq() && !irqs_disabled()); |
| + WARN_ON(!have_cpu_fpsimd_context()); |
| |
| if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { |
| if (system_supports_sve() && test_thread_flag(TIF_SVE)) { |
| @@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) |
| * task->thread.sve_state. |
| * |
| * Task can be a non-runnable task, or current. In the latter case, |
| - * softirqs (and preemption) must be disabled. |
| + * the caller must have ownership of the cpu FPSIMD context before calling |
| + * this function. |
| * task->thread.sve_state must point to at least sve_state_size(task) |
| * bytes of allocated kernel memory. |
| * task->thread.uw.fpsimd_state must be up to date before calling this |
| @@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task) |
| * task->thread.uw.fpsimd_state. |
| * |
| * Task can be a non-runnable task, or current. In the latter case, |
| - * softirqs (and preemption) must be disabled. |
| + * the caller must have ownership of the cpu FPSIMD context before calling |
| + * this function. |
| * task->thread.sve_state must point to at least sve_state_size(task) |
| * bytes of allocated kernel memory. |
| * task->thread.sve_state must be up to date before calling this function. |
| @@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task, |
| * non-SVE thread. |
| */ |
| if (task == current) { |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| |
| fpsimd_save(); |
| } |
| @@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task, |
| sve_to_fpsimd(task); |
| |
| if (task == current) |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| |
| /* |
| * Force reallocation of task SVE state to the correct size |
| @@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) |
| |
| sve_alloc(current); |
| |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| |
| fpsimd_save(); |
| |
| @@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) |
| if (test_and_set_thread_flag(TIF_SVE)) |
| WARN_ON(1); /* SVE access shouldn't have trapped */ |
| |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| |
| /* |
| @@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next) |
| if (!system_supports_fpsimd()) |
| return; |
| |
| + __get_cpu_fpsimd_context(); |
| + |
| /* Save unsaved fpsimd state, if any: */ |
| fpsimd_save(); |
| |
| @@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next) |
| |
| update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, |
| wrong_task || wrong_cpu); |
| + |
| + __put_cpu_fpsimd_context(); |
| } |
| |
| void fpsimd_flush_thread(void) |
| @@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void) |
| if (!system_supports_fpsimd()) |
| return; |
| |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| |
| fpsimd_flush_task_state(current); |
| memset(¤t->thread.uw.fpsimd_state, 0, |
| @@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void) |
| current->thread.sve_vl_onexec = 0; |
| } |
| |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| |
| /* |
| @@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void) |
| if (!system_supports_fpsimd()) |
| return; |
| |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| fpsimd_save(); |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| |
| /* |
| @@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void) |
| |
| /* |
| * Associate current's FPSIMD context with this cpu |
| - * Preemption must be disabled when calling this function. |
| + * The caller must have ownership of the cpu FPSIMD context before calling |
| + * this function. |
| */ |
| void fpsimd_bind_task_to_cpu(void) |
| { |
| @@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void) |
| if (!system_supports_fpsimd()) |
| return; |
| |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| |
| if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { |
| task_fpsimd_load(); |
| fpsimd_bind_task_to_cpu(); |
| } |
| |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| |
| /* |
| @@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) |
| if (!system_supports_fpsimd()) |
| return; |
| |
| - local_bh_disable(); |
| + get_cpu_fpsimd_context(); |
| |
| current->thread.uw.fpsimd_state = *state; |
| if (system_supports_sve() && test_thread_flag(TIF_SVE)) |
| @@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) |
| |
| clear_thread_flag(TIF_FOREIGN_FPSTATE); |
| |
| - local_bh_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| |
| /* |
| @@ -1133,7 +1187,8 @@ void fpsimd_flush_task_state(struct task_struct *t) |
| |
| /* |
| * Invalidate any task's FPSIMD state that is present on this cpu. |
| - * This function must be called with softirqs disabled. |
| + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() |
| + * before calling this function. |
| */ |
| static void fpsimd_flush_cpu_state(void) |
| { |
| @@ -1143,19 +1198,19 @@ static void fpsimd_flush_cpu_state(void) |
| |
| /* |
| * Save the FPSIMD state to memory and invalidate cpu view. |
| - * This function must be called with softirqs (and preemption) disabled. |
| + * This function must be called with preemption disabled. |
| */ |
| void fpsimd_save_and_flush_cpu_state(void) |
| { |
| + WARN_ON(preemptible()); |
| + __get_cpu_fpsimd_context(); |
| fpsimd_save(); |
| fpsimd_flush_cpu_state(); |
| + __put_cpu_fpsimd_context(); |
| } |
| |
| #ifdef CONFIG_KERNEL_MODE_NEON |
| |
| -DEFINE_PER_CPU(bool, kernel_neon_busy); |
| -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); |
| - |
| /* |
| * Kernel-side NEON support functions |
| */ |
| @@ -1180,19 +1235,13 @@ void kernel_neon_begin(void) |
| |
| BUG_ON(!may_use_simd()); |
| |
| - local_bh_disable(); |
| - |
| - __this_cpu_write(kernel_neon_busy, true); |
| + get_cpu_fpsimd_context(); |
| |
| /* Save unsaved fpsimd state, if any: */ |
| fpsimd_save(); |
| |
| /* Invalidate any task state remaining in the fpsimd regs: */ |
| fpsimd_flush_cpu_state(); |
| - |
| - preempt_disable(); |
| - |
| - local_bh_enable(); |
| } |
| EXPORT_SYMBOL(kernel_neon_begin); |
| |
| @@ -1207,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin); |
| */ |
| void kernel_neon_end(void) |
| { |
| - bool busy; |
| - |
| if (!system_supports_fpsimd()) |
| return; |
| |
| - busy = __this_cpu_xchg(kernel_neon_busy, false); |
| - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ |
| - |
| - preempt_enable(); |
| + put_cpu_fpsimd_context(); |
| } |
| EXPORT_SYMBOL(kernel_neon_end); |
| |
| -- |
| 2.7.4 |
| |