lib/crypto/arm/chacha-glue.c - pub/scm/linux/kernel/git/gregkh/tty - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * ChaCha and HChaCha functions (ARM optimized)
  *
  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  * Copyright (C) 2015 Martin Willi
  */

 #include <crypto/chacha.h>
 #include <crypto/internal/simd.h>
 #include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>

 #include <asm/cputype.h>
 #include <asm/hwcap.h>
 #include <asm/neon.h>
 #include <asm/simd.h>

 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
 				      u8 *dst, const u8 *src, int nrounds);
 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
 				       u8 *dst, const u8 *src,
 				       int nrounds, unsigned int nbytes);
 asmlinkage void hchacha_block_arm(const struct chacha_state *state,
 				  u32 out[HCHACHA_OUT_WORDS], int nrounds);
 asmlinkage void hchacha_block_neon(const struct chacha_state *state,
 				   u32 out[HCHACHA_OUT_WORDS], int nrounds);

 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
 			     const struct chacha_state *state, int nrounds);

 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);

 static inline bool neon_usable(void)
 {
 	return static_branch_likely(&use_neon) && crypto_simd_usable();
 }

 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
 			  unsigned int bytes, int nrounds)
 {
 	u8 buf[CHACHA_BLOCK_SIZE];

 	while (bytes > CHACHA_BLOCK_SIZE) {
 		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);

 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
 		bytes -= l;
 		src += l;
 		dst += l;
 		state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
 	}
 	if (bytes) {
 		const u8 *s = src;
 		u8 *d = dst;

 		if (bytes != CHACHA_BLOCK_SIZE)
 			s = d = memcpy(buf, src, bytes);
 		chacha_block_xor_neon(state, d, s, nrounds);
 		if (d != dst)
 			memcpy(dst, buf, bytes);
 		state->x[12]++;
 	}
 }

 void hchacha_block_arch(const struct chacha_state *state,
 			u32 out[HCHACHA_OUT_WORDS], int nrounds)
 {
 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
 		hchacha_block_arm(state, out, nrounds);
 	} else {
 		kernel_neon_begin();
 		hchacha_block_neon(state, out, nrounds);
 		kernel_neon_end();
 	}
 }
 EXPORT_SYMBOL(hchacha_block_arch);

 void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
 		       unsigned int bytes, int nrounds)
 {
 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
 	    bytes <= CHACHA_BLOCK_SIZE) {
 		chacha_doarm(dst, src, bytes, state, nrounds);
 		state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
 		return;
 	}

 	do {
 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

 		kernel_neon_begin();
 		chacha_doneon(state, dst, src, todo, nrounds);
 		kernel_neon_end();

 		bytes -= todo;
 		src += todo;
 		dst += todo;
 	} while (bytes);
 }
 EXPORT_SYMBOL(chacha_crypt_arch);

 bool chacha_is_arch_optimized(void)
 {
 	/* We always can use at least the ARM scalar implementation. */
 	return true;
 }
 EXPORT_SYMBOL(chacha_is_arch_optimized);

 static int __init chacha_arm_mod_init(void)
 {
 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
 		switch (read_cpuid_part()) {
 		case ARM_CPU_PART_CORTEX_A7:
 		case ARM_CPU_PART_CORTEX_A5:
 			/*
 			 * The Cortex-A7 and Cortex-A5 do not perform well with
 			 * the NEON implementation but do incredibly with the
 			 * scalar one and use less power.
 			 */
 			break;
 		default:
 			static_branch_enable(&use_neon);
 		}
 	}
 	return 0;
 }
 subsys_initcall(chacha_arm_mod_init);

 static void __exit chacha_arm_mod_exit(void)
 {
 }
 module_exit(chacha_arm_mod_exit);

 MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
	// SPDX-License-Identifier: GPL-2.0
	/*
	* ChaCha and HChaCha functions (ARM optimized)
	*
	* Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
	* Copyright (C) 2015 Martin Willi
	*/

	#include <crypto/chacha.h>
	#include <crypto/internal/simd.h>
	#include <linux/jump_label.h>
	#include <linux/kernel.h>
	#include <linux/module.h>

	#include <asm/cputype.h>
	#include <asm/hwcap.h>
	#include <asm/neon.h>
	#include <asm/simd.h>

	asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
	u8 dst, const u8 src, int nrounds);
	asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
	u8 dst, const u8 src,
	int nrounds, unsigned int nbytes);
	asmlinkage void hchacha_block_arm(const struct chacha_state *state,
	u32 out[HCHACHA_OUT_WORDS], int nrounds);
	asmlinkage void hchacha_block_neon(const struct chacha_state *state,
	u32 out[HCHACHA_OUT_WORDS], int nrounds);

	asmlinkage void chacha_doarm(u8 dst, const u8 src, unsigned int bytes,
	const struct chacha_state *state, int nrounds);

	static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);

	static inline bool neon_usable(void)
	{
	return static_branch_likely(&use_neon) && crypto_simd_usable();
	}

	static void chacha_doneon(struct chacha_state state, u8 dst, const u8 *src,
	unsigned int bytes, int nrounds)
	{
	u8 buf[CHACHA_BLOCK_SIZE];

	while (bytes > CHACHA_BLOCK_SIZE) {
	unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);

	chacha_4block_xor_neon(state, dst, src, nrounds, l);
	bytes -= l;
	src += l;
	dst += l;
	state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
	}
	if (bytes) {
	const u8 *s = src;
	u8 *d = dst;

	if (bytes != CHACHA_BLOCK_SIZE)
	s = d = memcpy(buf, src, bytes);
	chacha_block_xor_neon(state, d, s, nrounds);
	if (d != dst)
	memcpy(dst, buf, bytes);
	state->x[12]++;
	}
	}

	void hchacha_block_arch(const struct chacha_state *state,
	u32 out[HCHACHA_OUT_WORDS], int nrounds)
	{
	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) \|\| !neon_usable()) {
	hchacha_block_arm(state, out, nrounds);
	} else {
	kernel_neon_begin();
	hchacha_block_neon(state, out, nrounds);
	kernel_neon_end();
	}
	}
	EXPORT_SYMBOL(hchacha_block_arch);

	void chacha_crypt_arch(struct chacha_state state, u8 dst, const u8 *src,
	unsigned int bytes, int nrounds)
	{
	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) \|\| !neon_usable() \|\|
	bytes <= CHACHA_BLOCK_SIZE) {
	chacha_doarm(dst, src, bytes, state, nrounds);
	state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
	return;
	}

	do {
	unsigned int todo = min_t(unsigned int, bytes, SZ_4K);

	kernel_neon_begin();
	chacha_doneon(state, dst, src, todo, nrounds);
	kernel_neon_end();

	bytes -= todo;
	src += todo;
	dst += todo;
	} while (bytes);
	}
	EXPORT_SYMBOL(chacha_crypt_arch);

	bool chacha_is_arch_optimized(void)
	{
	/* We always can use at least the ARM scalar implementation. */
	return true;
	}
	EXPORT_SYMBOL(chacha_is_arch_optimized);

	static int __init chacha_arm_mod_init(void)
	{
	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
	switch (read_cpuid_part()) {
	case ARM_CPU_PART_CORTEX_A7:
	case ARM_CPU_PART_CORTEX_A5:
	/*
	* The Cortex-A7 and Cortex-A5 do not perform well with
	* the NEON implementation but do incredibly with the
	* scalar one and use less power.
	*/
	break;
	default:
	static_branch_enable(&use_neon);
	}
	}
	return 0;
	}
	subsys_initcall(chacha_arm_mod_init);

	static void __exit chacha_arm_mod_exit(void)
	{
	}
	module_exit(chacha_arm_mod_exit);

	MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
	MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
	MODULE_LICENSE("GPL v2");