arm64: Use level-2 for largest block mappings when FEAT_HAFDBS is present

In order to make invalidation by VA more efficient, set the largest
block mapping to 2MB, mapping it onto level-2. This has no material
impact on u-boot's runtime performance, and allows a huge speedup
when cleaning the cache.

Signed-off-by: Marc Zyngier <maz@kernel.org>
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index deca435..1e6f7ba 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -294,7 +294,7 @@
 	for (i = idx; size; i++) {
 		u64 next_size, *next_table;
 
-		if (level >= 1 &&
+		if (level >= gd->arch.first_block_level &&
 		    size >= map_size && !(virt & (map_size - 1))) {
 			if (level == 3)
 				table[i] = phys | attrs | PTE_TYPE_PAGE;
@@ -333,6 +333,9 @@
 	if (va_bits < 39)
 		level = 1;
 
+	if (!gd->arch.first_block_level)
+		gd->arch.first_block_level = 1;
+
 	if (gd->arch.has_hafdbs)
 		attrs |= PTE_DBM | PTE_RDONLY;
 
@@ -349,7 +352,7 @@
 	for (i = idx; size; i++) {
 		u64 next_size;
 
-		if (level >= 1 &&
+		if (level >= gd->arch.first_block_level &&
 		    size >= map_size && !(virt & (map_size - 1))) {
 			virt += map_size;
 			size -= map_size;
@@ -392,8 +395,10 @@
 	asm volatile("mrs %0, id_aa64mmfr1_el1" : "=r" (mmfr1));
 	if ((mmfr1 & 0xf) == 2) {
 		gd->arch.has_hafdbs = true;
+		gd->arch.first_block_level = 2;
 	} else {
 		gd->arch.has_hafdbs = false;
+		gd->arch.first_block_level = 1;
 	}
 
 	/* Account for all page tables we would need to cover our memory map */
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index a5ccdb6..9d80396 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -52,6 +52,7 @@
 #if defined(CONFIG_ARM64)
 	unsigned long tlb_fillptr;
 	unsigned long tlb_emerg;
+	unsigned int first_block_level;
 	bool has_hafdbs;
 #endif
 #endif