| // SPDX-License-Identifier: GPL-2.0 | 
 | /* | 
 |  * ChaCha and HChaCha functions (ARM optimized) | 
 |  * | 
 |  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> | 
 |  * Copyright (C) 2015 Martin Willi | 
 |  */ | 
 |  | 
 | #include <crypto/chacha.h> | 
 | #include <crypto/internal/simd.h> | 
 | #include <linux/jump_label.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/module.h> | 
 |  | 
 | #include <asm/cputype.h> | 
 | #include <asm/hwcap.h> | 
 | #include <asm/neon.h> | 
 | #include <asm/simd.h> | 
 |  | 
 | asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, | 
 | 				      u8 *dst, const u8 *src, int nrounds); | 
 | asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, | 
 | 				       u8 *dst, const u8 *src, | 
 | 				       int nrounds, unsigned int nbytes); | 
 | asmlinkage void hchacha_block_arm(const struct chacha_state *state, | 
 | 				  u32 out[HCHACHA_OUT_WORDS], int nrounds); | 
 | asmlinkage void hchacha_block_neon(const struct chacha_state *state, | 
 | 				   u32 out[HCHACHA_OUT_WORDS], int nrounds); | 
 |  | 
 | asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, | 
 | 			     const struct chacha_state *state, int nrounds); | 
 |  | 
 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); | 
 |  | 
 | static inline bool neon_usable(void) | 
 | { | 
 | 	return static_branch_likely(&use_neon) && crypto_simd_usable(); | 
 | } | 
 |  | 
 | static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, | 
 | 			  unsigned int bytes, int nrounds) | 
 | { | 
 | 	u8 buf[CHACHA_BLOCK_SIZE]; | 
 |  | 
 | 	while (bytes > CHACHA_BLOCK_SIZE) { | 
 | 		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); | 
 |  | 
 | 		chacha_4block_xor_neon(state, dst, src, nrounds, l); | 
 | 		bytes -= l; | 
 | 		src += l; | 
 | 		dst += l; | 
 | 		state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); | 
 | 	} | 
 | 	if (bytes) { | 
 | 		const u8 *s = src; | 
 | 		u8 *d = dst; | 
 |  | 
 | 		if (bytes != CHACHA_BLOCK_SIZE) | 
 | 			s = d = memcpy(buf, src, bytes); | 
 | 		chacha_block_xor_neon(state, d, s, nrounds); | 
 | 		if (d != dst) | 
 | 			memcpy(dst, buf, bytes); | 
 | 		state->x[12]++; | 
 | 	} | 
 | } | 
 |  | 
 | void hchacha_block_arch(const struct chacha_state *state, | 
 | 			u32 out[HCHACHA_OUT_WORDS], int nrounds) | 
 | { | 
 | 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { | 
 | 		hchacha_block_arm(state, out, nrounds); | 
 | 	} else { | 
 | 		kernel_neon_begin(); | 
 | 		hchacha_block_neon(state, out, nrounds); | 
 | 		kernel_neon_end(); | 
 | 	} | 
 | } | 
 | EXPORT_SYMBOL(hchacha_block_arch); | 
 |  | 
 | void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, | 
 | 		       unsigned int bytes, int nrounds) | 
 | { | 
 | 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || | 
 | 	    bytes <= CHACHA_BLOCK_SIZE) { | 
 | 		chacha_doarm(dst, src, bytes, state, nrounds); | 
 | 		state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); | 
 | 		return; | 
 | 	} | 
 |  | 
 | 	do { | 
 | 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K); | 
 |  | 
 | 		kernel_neon_begin(); | 
 | 		chacha_doneon(state, dst, src, todo, nrounds); | 
 | 		kernel_neon_end(); | 
 |  | 
 | 		bytes -= todo; | 
 | 		src += todo; | 
 | 		dst += todo; | 
 | 	} while (bytes); | 
 | } | 
 | EXPORT_SYMBOL(chacha_crypt_arch); | 
 |  | 
 | bool chacha_is_arch_optimized(void) | 
 | { | 
 | 	/* We always can use at least the ARM scalar implementation. */ | 
 | 	return true; | 
 | } | 
 | EXPORT_SYMBOL(chacha_is_arch_optimized); | 
 |  | 
 | static int __init chacha_arm_mod_init(void) | 
 | { | 
 | 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { | 
 | 		switch (read_cpuid_part()) { | 
 | 		case ARM_CPU_PART_CORTEX_A7: | 
 | 		case ARM_CPU_PART_CORTEX_A5: | 
 | 			/* | 
 | 			 * The Cortex-A7 and Cortex-A5 do not perform well with | 
 | 			 * the NEON implementation but do incredibly with the | 
 | 			 * scalar one and use less power. | 
 | 			 */ | 
 | 			break; | 
 | 		default: | 
 | 			static_branch_enable(&use_neon); | 
 | 		} | 
 | 	} | 
 | 	return 0; | 
 | } | 
 | subsys_initcall(chacha_arm_mod_init); | 
 |  | 
 | static void __exit chacha_arm_mod_exit(void) | 
 | { | 
 | } | 
 | module_exit(chacha_arm_mod_exit); | 
 |  | 
 | MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); | 
 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 
 | MODULE_LICENSE("GPL v2"); |