aarch64: Enable use of SME by EL2 and below

Allow lower ELs to use SME when booted on a system that support it. This
requires us to set two new bits, one in each of SCR_EL3 and CPTR_EL3, set
the maximum vector length in a similar fashion to SVE and if the optional
FA64 feature is present then set another feature bit in the new SMCR
register.

Signed-off-by: Mark Brown <broonie@kernel.org>
[Mark R: use BIT() for ID_AA64SMFR0_EL1_FA64, sort ID_AA64PFR1_EL1 fields]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20220201172132.2399026-2-broonie@kernel.org
diff --git a/arch/aarch64/include/asm/cpu.h b/arch/aarch64/include/asm/cpu.h
index ce80b6e..69dfcd5 100644
--- a/arch/aarch64/include/asm/cpu.h
+++ b/arch/aarch64/include/asm/cpu.h
@@ -49,6 +49,7 @@
 #define SCR_EL3_FGTEN			BIT(27)
 #define SCR_EL3_ECVEN			BIT(28)
 #define SCR_EL3_TME			BIT(34)
+#define SCR_EL3_EnTP2			BIT(41)
 
 #define HCR_EL2_RES1			BIT(1)
 
@@ -70,8 +71,12 @@
 #define ID_AA64MMFR0_EL1_ECV		BITS(63, 60)
 
 #define ID_AA64PFR1_EL1_MTE		BITS(11, 8)
+#define ID_AA64PFR1_EL1_SME		BITS(27, 24)
 #define ID_AA64PFR0_EL1_SVE		BITS(35, 32)
 
+#define ID_AA64SMFR0_EL1		s3_0_c0_c4_5
+#define ID_AA64SMFR0_EL1_FA64		BIT(63)
+
 /*
  * Initial register values required for the boot-wrapper to run out-of-reset.
  */
@@ -96,6 +101,7 @@
 #define SPSR_EL2H		(9 << 0)	/* EL2 Handler mode */
 #define SPSR_HYP		(0x1a << 0)	/* M[3:0] = hyp, M[4] = AArch32 */
 
+#define CPTR_EL3_ESM		(1 << 12)
 #define CPTR_EL3_EZ		(1 << 8)
 
 #define ICC_SRE_EL2		S3_4_C12_C9_5
@@ -107,6 +113,10 @@
 #define ZCR_EL3			s3_6_c1_c2_0
 #define ZCR_EL3_LEN_MAX		0xf
 
+#define SMCR_EL3		s3_6_c1_c2_6
+#define SMCR_EL3_FA64		(1 << 31)
+#define SMCR_EL3_LEN_MAX	0xf
+
 #define ID_AA64ISAR2_EL1	s3_0_c0_c6_2
 
 #define SCTLR_EL1_CP15BEN	(1 << 5)
diff --git a/arch/aarch64/init.c b/arch/aarch64/init.c
index 8bb0524..db73b58 100644
--- a/arch/aarch64/init.c
+++ b/arch/aarch64/init.c
@@ -47,6 +47,7 @@
 	unsigned long scr = SCR_EL3_RES1 | SCR_EL3_NS | SCR_EL3_HCE;
 	unsigned long mdcr = 0;
 	unsigned long cptr = 0;
+	unsigned long smcr = 0;
 
 	if (cpu_has_pauth())
 		scr |= SCR_EL3_APK | SCR_EL3_API;
@@ -95,6 +96,27 @@
 		msr(ZCR_EL3, ZCR_EL3_LEN_MAX);
 	}
 
+	if (mrs_field(ID_AA64PFR1_EL1, SME)) {
+		cptr |= CPTR_EL3_ESM;
+		msr(CPTR_EL3, cptr);
+		isb();
+
+		scr |= SCR_EL3_EnTP2;
+		msr(SCR_EL3, scr);
+		isb();
+
+		/*
+		 * Write the maximum possible vector length, hardware
+		 * will constrain to the actual limit.
+		 */
+		smcr = SMCR_EL3_LEN_MAX;
+
+		if (mrs_field(ID_AA64SMFR0_EL1, FA64))
+			smcr |= SMCR_EL3_FA64;
+
+		msr(SMCR_EL3, smcr);
+	}
+
 	msr(CNTFRQ_EL0, COUNTER_FREQ);
 }