s390/stackleak: provide fast __stackleak_poison() implementation

Provide an s390 specific __stackleak_poison() implementation which is
faster than the generic variant.

For the original implementation with an enforced 4kb stackframe for the
getpid() system call the system call overhead increases by a factor of 3 if
the stackleak feature is enabled. Using the s390 mvc based variant this is
reduced to an increase of 25% instead.

This is within the expected area, since the mvc based implementation is
more or less a memset64() variant which comes with similar results. See
commit 0b77d6701cf8 ("s390: implement memset16, memset32 & memset64").

Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Link: https://lore.kernel.org/r/20230405130841.1350565-3-hca@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index efffc28..dc17896 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -118,6 +118,41 @@
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
+#define __stackleak_poison __stackleak_poison
+static __always_inline void __stackleak_poison(unsigned long erase_low,
+					       unsigned long erase_high,
+					       unsigned long poison)
+{
+	unsigned long tmp, count;
+
+	count = erase_high - erase_low;
+	if (!count)
+		return;
+	asm volatile(
+		"	cghi	%[count],8\n"
+		"	je	2f\n"
+		"	aghi	%[count],-(8+1)\n"
+		"	srlg	%[tmp],%[count],8\n"
+		"	ltgr	%[tmp],%[tmp]\n"
+		"	jz	1f\n"
+		"0:	stg	%[poison],0(%[addr])\n"
+		"	mvc	8(256-8,%[addr]),0(%[addr])\n"
+		"	la	%[addr],256(%[addr])\n"
+		"	brctg	%[tmp],0b\n"
+		"1:	stg	%[poison],0(%[addr])\n"
+		"	larl	%[tmp],3f\n"
+		"	ex	%[count],0(%[tmp])\n"
+		"	j	4f\n"
+		"2:	stg	%[poison],0(%[addr])\n"
+		"	j	4f\n"
+		"3:	mvc	8(1,%[addr]),0(%[addr])\n"
+		"4:\n"
+		: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
+		: [poison] "d" (poison)
+		: "memory", "cc"
+		);
+}
+
 /*
  * Thread structure
  */