rand: add AES random buffer generator

This will be used by default for just buffer data generation, as I
suspect the overhead of doing it for smaller bits of data would mean
it's not really a win.

Pretty sure there are massive amounts of improvements available here,
this is just meant to serve as a proof-of-concept on the simplest
possible implementation. The grunt of this work is really doing the
detection properly and ensuring it's abstracted out nicely.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/Makefile b/Makefile
index 634d2c9..1f3cbb5 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@
   FIO_CFLAGS += -O3
 endif
 ifdef CONFIG_BUILD_NATIVE
-  FIO_CFLAGS += -march=native
+  FIO_CFLAGS += -march=native -maes
 endif
 
 ifdef CONFIG_PDB
diff --git a/arch/arch-x86-common.h b/arch/arch-x86-common.h
index f32835c..6a35673 100644
--- a/arch/arch-x86-common.h
+++ b/arch/arch-x86-common.h
@@ -15,7 +15,7 @@
 #define ARCH_HAVE_INIT
 
 extern bool tsc_reliable;
-extern int arch_random;
+extern int arch_random, arch_aes;
 
 static inline void arch_init_intel(void)
 {
@@ -37,11 +37,12 @@
 	tsc_reliable = (edx & (1U << 8)) != 0;
 
 	/*
-	 * Check for FDRAND
+	 * Check for FDRAND / AES
 	 */
 	eax = 0x1;
 	do_cpuid(&eax, &ebx, &ecx, &edx);
 	arch_random = (ecx & (1U << 30)) != 0;
+	arch_aes = (ecx & (1U << 25)) != 0;
 }
 
 static inline void arch_init_amd(void)
@@ -54,6 +55,13 @@
 
 	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
 	tsc_reliable = (edx & (1U << 8)) != 0;
+
+	/*
+	 * Check for AES
+	 */
+	eax = 0x1;
+	do_cpuid(&eax, &ebx, &ecx, &edx);
+	arch_aes = (ecx & (1U << 25)) != 0;
 }
 
 static inline void arch_init(char *envp[])
diff --git a/configure b/configure
index a2b9bd4..7c1f38f 100755
--- a/configure
+++ b/configure
@@ -2908,6 +2908,28 @@
 fi
 print_config "timerfd_create" "$timerfd_create"
 
+##########################################
+# check for AES
+arch_aes="no"
+if test "$disable_native" != "yes" ; then
+cat > $TMPC << EOF
+#include <stdint.h>
+#include <wmmintrin.h>
+
+int main(int argc, char **argv)
+{
+  __m128i tmp, foo;
+  _mm_setr_epi32(0, 2, 4, 8);
+  tmp = _mm_aesenc_si128(tmp, foo);
+  return 0;
+}
+EOF
+  if compile_prog "-march=native -maes" "" "arch_aes"; then
+    arch_aes="yes"
+  fi
+fi
+print_config "arch_aes" "$arch_aes"
+
 #############################################################################
 
 if test "$wordsize" = "64" ; then
@@ -3262,6 +3284,11 @@
   CFLAGS="$CFLAGS -fsanitize=address"
   LDFLAGS="$LDFLAGS -fsanitize=address"
 fi
+if test "$arch_aes" = "yes"; then
+  CFLAGS="$CFLAGS -maes"
+  output_sym "CONFIG_ARCH_AES"
+fi
+
 print_config "Lib-based ioengines dynamic" "$dynamic_engines"
 cat > $TMPC << EOF
 int main(int argc, char **argv)
diff --git a/init.c b/init.c
index f6d724a..0cebe37 100644
--- a/init.c
+++ b/init.c
@@ -1111,7 +1111,7 @@
 	td_fill_rand_seeds_internal(td, rand_type);
 
 	init_rand_seed(&td->buf_state, td->rand_seeds[FIO_RAND_BUF_OFF],
-			rand_type);
+			arch_aes ? FIO_RAND_AES : rand_type);
 	frand_copy(&td->buf_state_prev, &td->buf_state);
 }
 
diff --git a/io_u.c b/io_u.c
index eec378d..45f7f95 100644
--- a/io_u.c
+++ b/io_u.c
@@ -2260,12 +2260,18 @@
 			left -= this_write;
 			save_buf_state(td, rs);
 		} while (left);
-	} else if (o->buffer_pattern_bytes)
+	} else if (o->buffer_pattern_bytes) {
 		fill_buffer_pattern(td, buf, max_bs);
-	else if (o->zero_buffers)
+	} else if (o->zero_buffers) {
 		memset(buf, 0, max_bs);
-	else
-		fill_random_buf(get_buf_state(td), buf, max_bs);
+	} else {
+#ifdef CONFIG_ARCH_AES
+		if (arch_aes)
+			fill_random_buf_aes(&td->buf_state, buf, max_bs);
+		else
+#endif
+			fill_random_buf(get_buf_state(td), buf, max_bs);
+	}
 }
 
 /*
diff --git a/lib/rand.c b/lib/rand.c
index 2c59356..62aa978 100644
--- a/lib/rand.c
+++ b/lib/rand.c
@@ -34,11 +34,14 @@
 */
 
 #include <string.h>
+#ifdef CONFIG_ARCH_AES
+#include <wmmintrin.h>
+#endif
 #include "rand.h"
 #include "pattern.h"
 #include "../hash.h"
 
-int arch_random;
+int arch_random, arch_aes;
 
 static inline uint64_t __seed(uint64_t x, uint64_t m)
 {
@@ -75,6 +78,14 @@
 		__rand64(state);
 }
 
+static void __init_rand_aes(struct frand_state *state, unsigned int seed)
+{
+#ifdef CONFIG_ARCH_AES
+	if (arch_aes)
+		aes_seed(state, seed);
+#endif
+}
+
 void init_rand(struct frand_state *state, enum fio_rand_type rand_type)
 {
 	state->rand_type = rand_type;
@@ -86,6 +97,9 @@
 	case FIO_RAND_64:
 		__init_rand64(&state->state64, 1);
 		break;
+	case FIO_RAND_AES:
+		__init_rand_aes(state, 1);
+		break;
 	}
 }
 
@@ -98,6 +112,14 @@
 	case FIO_RAND_32:
 		__init_rand32(&state->state32, (unsigned int) seed);
 		break;
+	case FIO_RAND_AES:
+#ifdef CONFIG_ARCH_AES
+		if (arch_aes) {
+			aes_seed(state, seed);
+			break;
+		}
+#endif
+		fio_fallthrough;
 	case FIO_RAND_64:
 		__init_rand64(&state->state64, seed);
 		break;
@@ -219,3 +241,42 @@
 					pattern, pbytes);
 	return r;
 }
+
+#ifdef CONFIG_ARCH_AES
+void aes_seed(struct frand_state *fs, unsigned int seed)
+{
+	fs->aes_accum = _mm_setr_epi32(seed, seed * 2, seed * 3, seed * 4);
+	fs->aes_key = _mm_setr_epi32(seed * 5, seed * 6, seed * 7, seed * 8);
+}
+
+void fill_random_buf_aes(struct frand_state *fs, void *buf, unsigned int len)
+{
+	__m128i accum = fs->aes_accum;
+	__m128i key = fs->aes_key;
+	unsigned int rem = len & 15;
+	int i, loops;
+
+	loops = len / 16;
+	if (fio_unlikely(!len))
+		goto old_fill;
+
+	for (i = 0; i < loops; i++) {
+		__m128i tmp;
+
+		tmp = _mm_aesenc_si128(accum, key);
+		accum = tmp;
+		_mm_store_si128(buf, tmp);
+		buf += 16;
+	}
+
+	fs->aes_accum = accum;
+	fs->aes_key = key;
+
+	if (fio_unlikely(rem)) {
+		unsigned int r;
+old_fill:
+		r = *(unsigned long *) buf;
+		__fill_random_buf(buf, rem, r);
+	}
+}
+#endif
diff --git a/lib/rand.h b/lib/rand.h
index 77548f9..4b9c8d8 100644
--- a/lib/rand.h
+++ b/lib/rand.h
@@ -4,8 +4,13 @@
 #include <inttypes.h>
 #include <assert.h>
 #include <stdio.h>
+#ifdef CONFIG_ARCH_AES
+#include <xmmintrin.h>
+#endif
 #include "types.h"
 
+extern int arch_random, arch_aes;
+
 #define FRAND32_MAX	(-1U)
 #define FRAND32_MAX_PLUS_ONE	(1.0 * (1ULL << 32))
 #define FRAND64_MAX	(-1ULL)
@@ -14,6 +19,7 @@
 enum fio_rand_type {
 	FIO_RAND_32,
 	FIO_RAND_64,
+	FIO_RAND_AES,
 };
 
 struct taus88_state {
@@ -30,6 +36,10 @@
 		struct taus88_state state32;
 		struct taus258_state state64;
 	};
+#ifdef CONFIG_ARCH_AES
+	__m128i aes_key;
+	__m128i aes_accum;
+#endif
 };
 
 static inline uint64_t rand_max(struct frand_state *state)
@@ -62,6 +72,15 @@
 	dst->s5 = src->s5;
 }
 
+static inline void __frand_aes_copy(struct frand_state *dst,
+				    struct frand_state *src)
+{
+#ifdef CONFIG_ARCH_AES
+	dst->aes_key = src->aes_key;
+	dst->aes_accum = src->aes_accum;
+#endif
+}
+
 static inline void frand_copy(struct frand_state *dst, struct frand_state *src)
 {
 	switch (src->rand_type) {
@@ -71,6 +90,9 @@
 	case FIO_RAND_32:
 		__frand32_copy(&dst->state32, &src->state32);
 		break;
+	case FIO_RAND_AES:
+		__frand_aes_copy(dst, src);
+		break;
 	}
 	dst->rand_type = src->rand_type;
 }
@@ -196,4 +218,9 @@
 extern void __fill_random_buf_percentage(uint64_t, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
 extern uint64_t fill_random_buf_percentage(struct frand_state *, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
 
+#ifdef CONFIG_ARCH_AES
+void fill_random_buf_aes(struct frand_state *, void *, unsigned int);
+void aes_seed(struct frand_state *, unsigned int);
+#endif
+
 #endif
diff --git a/t/arch.c b/t/arch.c
index a72cef3..2955b23 100644
--- a/t/arch.c
+++ b/t/arch.c
@@ -1,4 +1,4 @@
 #include "../arch/arch.h"
 
 unsigned long arch_flags = 0;
-int arch_random;
+int arch_random, arch_aes;