add support for data content verification

Signed-off-by: Chris Mason <clm@fb.com>
diff --git a/Makefile b/Makefile
index b2221f3..4aa0fc4 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@
 %.o: %.c
 	$(CC) -o $*.o -c $(ALL_CFLAGS) $<
 
-simoop: simoop.o
+simoop: simoop.o xxhash.o
 	$(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) -lpthread
 
 depend:
diff --git a/simoop.c b/simoop.c
index 4b61f3e..b2025e4 100644
--- a/simoop.c
+++ b/simoop.c
@@ -25,6 +25,8 @@
 #include <locale.h>
 #include <ctype.h>
 #include <limits.h>
+#include <stdint.h>
+#include "xxhash.h"
 
 /* these are part of the histogram accounting */
 #define PLAT_BITS	8
@@ -37,7 +39,9 @@
 #define DIR_LEVEL 64
 
 /* buffer size for reads and writes */
-#define BUF_SIZE 65536
+#define BUF_SIZE (1 * 1024 * 1024)
+
+#define NAME_LEN 256
 
 /*
  * we make a few different kinds of files, these are appended onto the
@@ -68,7 +72,7 @@
 /* -T number of files to read */
 static int rw_threads = 8;
 /* -D number of threads running du */
-static int du_threads = 1;
+static int du_threads = 0;
 /* memory to allocate and use during each task */
 static int thinking_mem = 128 * 1024 * 1024;
 /* should we do a truncate and fsync after every write */
@@ -87,6 +91,8 @@
 /* how long we sleep while processing requests */
 static int sleeptime = 10000;
 
+static uint64_t global_rand_seed = 0x89ABCEF;
+
 /*
  * after warmup_seconds, we reset the counters to get rid of noise from
  * early in the run
@@ -159,19 +165,162 @@
 	struct stats stats[TOTAL_VMSTATS];
 };
 
-static void save_vmstat_rates(struct vmstat_info *vmstat_info)
+#define VERIFY_MAGIC "simoopv1"
+#define VERIFY_MAGIC_LEN 8
+
+/* our verify blocks are pretty small, which allows us to sub-page writes */
+#define VERIFY_ALIGNMENT ((loff_t)1024)
+char pattern_buffer[VERIFY_ALIGNMENT];
+
+struct verify_header {
+	uint64_t crc;
+	/* how many bytes did we crc */
+	uint64_t length;
+	/* starting offset of this block in the file */
+	uint64_t offset;
+	/* seed for recreating the random data */
+	uint64_t rand_seed;
+	uint64_t spare[2];
+	/* VERIFY_MAGIC above (zero filled first) */
+	char magic[VERIFY_MAGIC_LEN];
+};
+
+void *verify_crc_start(struct verify_header *verify_header)
 {
+	return &verify_header->length;
+}
+
+unsigned long verify_crc_offset(void)
+{
+	struct verify_header *verify_header = NULL;
+	return (unsigned long)(&verify_header->length);
+}
+
+static uint64_t __calc_crc(void *xxhash_state,
+			   struct verify_header *verify_header)
+{
+	int ret;
+
+	ret = XXH32_resetState(xxhash_state, verify_header->rand_seed);
+	if (ret) {
+		fprintf(stderr, "error %d during XXH32_resetState\n", ret);
+		exit(1);
+	}
+
+	ret = XXH32_update(xxhash_state, verify_crc_start(verify_header),
+			   verify_header->length - verify_crc_offset());
+	if (ret) {
+		fprintf(stderr, "error %d from XXH32_update\n", ret);
+		exit(1);
+	}
+
+	return XXH32_intermediateDigest(xxhash_state);
+}
+
+static void init_pattern_buffer(char *buffer, uint64_t seed)
+{
+	char pattern[128];
 	int i;
-	for (i = 0; i < TOTAL_VMSTATS; i++) {
-		vmstat_info->last_rate[i] = vmstat_info->rate[i];
+	int this_copy;
+	int length = VERIFY_ALIGNMENT;
+	char *p = buffer;
+
+	srand(seed);
+	for (i = 0; i < 128; i++)
+		pattern[i] = rand();
+
+	while(length > 0) {
+		if (length > 128)
+			this_copy = 128;
+		else
+			this_copy = length;
+		memcpy(p, pattern, this_copy);
+		p += this_copy;
+		length -= this_copy;
 	}
 }
 
-static void save_instant_vmstat_rates(struct vmstat_info *vmstat_info)
+static void crc_block(void *xxhash_state, char *block, uint64_t offset,
+		      uint64_t length, uint64_t rand_seed)
 {
+	struct verify_header *verify_header = (struct verify_header *)block;
+
+	if (length < sizeof(*verify_header)) {
+		fprintf(stderr, "blocksize too small for crc header\n");
+		exit(1);
+	}
+
+	memset(verify_header, 0, sizeof(*verify_header));
+	verify_header->length = length;
+	verify_header->offset = offset;
+	verify_header->rand_seed = rand_seed;
+	strncpy(verify_header->magic, VERIFY_MAGIC, VERIFY_MAGIC_LEN);
+	verify_header->crc = __calc_crc(xxhash_state, verify_header);
+
+}
+
+static loff_t verify_align(loff_t value)
+{
+	return (value / VERIFY_ALIGNMENT) * VERIFY_ALIGNMENT;
+}
+
+static loff_t verify_align_up(loff_t value)
+{
+
+	value += VERIFY_ALIGNMENT - 1;
+	return verify_align(value);
+}
+
+static void dump_bad_block(char *block, uint64_t offset)
+{
+	struct verify_header *verify_header = (struct verify_header *)block;
+	uint64_t seed = verify_header->rand_seed;
+	char *tmp = malloc(VERIFY_ALIGNMENT);
 	int i;
-	for (i = 0; i < TOTAL_VMSTATS; i++) {
-		vmstat_info->instant_rate[i] = vmstat_info->rate[i];
+
+	if (!tmp) {
+		fprintf(stderr, "malloc failed\n");
+		exit(1);
+	}
+
+	init_pattern_buffer(tmp, seed);
+	memcpy(tmp, verify_header, sizeof(*verify_header));
+	for (i = 0; i < VERIFY_ALIGNMENT; i++) {
+		if (tmp[i] != block[i]) {
+			fprintf(stderr, "bad_block offset %lu (index %d) found 0x%x wanted 0x%x\n",
+				offset + i, i, block[i], tmp[i]);
+			break;
+		}
+	}
+	free(tmp);
+}
+
+static void check_block_headers(void *xxhash_state, char *filename,
+				char *block, uint64_t offset, uint64_t length)
+{
+	struct verify_header *verify_header = (struct verify_header *)block;
+	uint64_t crc;
+
+	if (strncmp(verify_header->magic, VERIFY_MAGIC, VERIFY_MAGIC_LEN)) {
+		fprintf(stderr, "bad magic file %s offset %lu\n", filename, offset);
+		exit(1);
+	}
+	if (verify_header->offset != offset) {
+		fprintf(stderr, "bad offset file %s offset %lu found %lu\n",
+			filename, offset, verify_header->offset);
+		exit(1);
+	}
+	if (verify_header->length != length) {
+		fprintf(stderr, "bad buffer length file %s length %lu found %lu\n",
+			filename, length, verify_header->length);
+		exit(1);
+	}
+	crc = __calc_crc(xxhash_state, verify_header);
+	if (crc != verify_header->crc) {
+		fprintf(stderr, "bad crc file %s crc %lx found %lx\n",
+			filename, crc, verify_header->crc);
+		dump_bad_block(block, offset);
+		exit(1);
 	}
 }
 
@@ -259,7 +408,7 @@
 	return ret;
 }
 
-char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oa";
+char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oaI";
 static struct option long_options[] = {
 	{"appendmode", required_argument, 0, 'a'},
 	{"mmapsize", required_argument, 0, 'M'},
@@ -362,15 +511,18 @@
 			break;
 		case 'f':
 			file_size = parse_size(optarg);
+			file_size = verify_align_up(file_size);
 			break;
 		case 'n':
 			num_files = atoi(optarg);
 			break;
 		case 'R':
 			read_size = parse_size(optarg);
+			read_size = verify_align_up(read_size);
 			break;
 		case 'W':
 			write_size = parse_size(optarg);
+			write_size = verify_align_up(write_size);
 			break;
 		case 'T':
 			rw_threads = atoi(optarg);
@@ -657,15 +809,15 @@
  */
 static void make_one_dir(char *path, int a, int b)
 {
-	char subdir[256];
+	char subdir[NAME_LEN];
 	int ret;
 
 	if (b >= 0)
-		ret = snprintf(subdir, 256, "%s/%d/%d", path, a, b);
+		ret = snprintf(subdir, NAME_LEN, "%s/%d/%d", path, a, b);
 	else
-		ret = snprintf(subdir, 256, "%s/%d", path, a);
+		ret = snprintf(subdir, NAME_LEN, "%s/%d", path, a);
 
-	if (ret >= 256 || ret < 0) {
+	if (ret >= NAME_LEN || ret < 0) {
 		perror("file name too long\n");
 		exit(1);
 	}
@@ -705,11 +857,11 @@
 	b = (seq / DIR_LEVEL) % DIR_LEVEL;
 
 	if (postfix)
-		ret = snprintf(name, 256, "%s/%d/%d/%d-%s", path, a, b, seq, postfix);
+		ret = snprintf(name, NAME_LEN, "%s/%d/%d/%d-%s", path, a, b, seq, postfix);
 	else
-		ret = snprintf(name, 256, "%s/%d/%d/%d", path, a, b, seq);
+		ret = snprintf(name, NAME_LEN, "%s/%d/%d/%d", path, a, b, seq);
 
-	if (ret >= 256 || ret < 0) {
+	if (ret >= NAME_LEN || ret < 0) {
 		perror("file name too long\n");
 		exit(1);
 	}
@@ -718,7 +870,7 @@
 /* unlink working files not part of the main dataset for a given filename. */
 static void unlink_extra(char *path, int seq)
 {
-	char name[256];
+	char name[NAME_LEN];
 	int ret;
 
 	join_path(name, path, seq, RESULT_FILE);
@@ -739,7 +891,7 @@
 static int open_path(char *path, int seq, char *postfix, int flags)
 {
 	int fd;
-	char name[256];
+	char name[NAME_LEN];
 
 	join_path(name, path, seq, postfix);
 	fd = open(name, O_RDWR | O_CREAT | flags, 0600);
@@ -750,7 +902,7 @@
 	return fd;
 }
 
-static int randomize_size(int sz)
+static loff_t randomize_size(int sz)
 {
 	if (!oddsizes)
 		return sz;
@@ -758,14 +910,95 @@
 	return rand() % sz;
 }
 
+static void write_pattern(int fd, void *xxhash_state, char *buf,
+			  int buffer_len, loff_t start, off_t length)
+{
+	ssize_t ret;
+	loff_t aligned_start;
+	char *p;
+	ssize_t this_write;
+	ssize_t cur_len;
+
+	/* round down the start */
+	aligned_start = verify_align(start);
+	length += start - aligned_start;
+
+	/* round up the length */
+	length = verify_align_up(length);
+
+	while (length > 0) {
+		if (length > buffer_len)
+			this_write = buffer_len;
+		else
+			this_write = length;
+
+		/* fill the buffer with the pattern and headers */
+		cur_len = 0;
+		p = buf;
+		while (cur_len < this_write) {
+			memcpy(p, pattern_buffer, VERIFY_ALIGNMENT);
+			crc_block(xxhash_state, p, aligned_start + cur_len,
+				  VERIFY_ALIGNMENT, global_rand_seed);
+			cur_len += VERIFY_ALIGNMENT;
+			p += VERIFY_ALIGNMENT;
+		}
+
+		ret = pwrite(fd, buf, this_write, aligned_start);
+		if (ret != this_write) {
+			perror("pwrite");
+			exit(1);
+		}
+		aligned_start += this_write;
+		length -= this_write;
+	}
+}
+
+static void read_and_crc(int fd, char *filename,
+			 void **xxhash_state, char *buf,
+			 int buffer_len, loff_t start, off_t length)
+{
+	ssize_t ret;
+	loff_t aligned_start;
+	char *p;
+	ssize_t this_read;
+	ssize_t cur_len;
+
+	aligned_start = verify_align(start);
+	length += start - aligned_start;
+	length = verify_align_up(length);
+
+	while (length > 0) {
+		if (length > buffer_len)
+			this_read = buffer_len;
+		else
+			this_read = length;
+		ret = pread(fd, buf, this_read, aligned_start);
+		if (ret != this_read) {
+			perror("pread");
+			exit(1);
+		}
+		p = buf;
+		cur_len = 0;
+		while (cur_len < this_read) {
+			check_block_headers(xxhash_state, filename,
+					    p, aligned_start + cur_len,
+					    VERIFY_ALIGNMENT);
+			cur_len += VERIFY_ALIGNMENT;
+			p += VERIFY_ALIGNMENT;
+		}
+		aligned_start += this_read;
+		length -= this_read;
+	}
+}
+
 /* helper for startup, do initial writes to a given fd */
-static void fill_one_file(int fd)
+static void fill_one_file(int fd, void *xxhash_state)
 {
 	struct stat st;
 	int ret;
-	unsigned long long cur_size;
+	loff_t cur_size;
 	char *buf;
-	unsigned long long this_size = randomize_size(file_size);
+	loff_t this_size = randomize_size(file_size);
 
 	ret = fstat(fd, &st);
 	if (ret < 0) {
@@ -774,41 +1007,16 @@
 	}
 	cur_size = st.st_size;
 
-	if (append_mode && oddsizes && this_size > 4096 && rand() % 2) {
-		this_size = this_size % 4096;
-	}
-
-	if (cur_size >= this_size) {
-		if (append_mode) {
-			ftruncate(fd, this_size);
-		}
+	if (cur_size >= this_size)
 		return;
-	}
 
-	buf = malloc(BUF_SIZE);
-	if (!buf) {
-		perror("malloc");
+	ret = posix_memalign((void **)(&buf), getpagesize(), BUF_SIZE);
+	if (ret) {
+		perror("posix_memalign");
 		exit(1);
 	}
 
-	memset(buf, 'a', BUF_SIZE);
-	while (cur_size < this_size) {
-		int this_write = this_size - cur_size;
-
-		if (this_write > BUF_SIZE)
-			this_write = BUF_SIZE;
-
-		ret = write(fd, buf, this_write);
-		if (ret < 0) {
-			perror("write");
-			exit(1);
-		}
-		if (ret < this_write) {
-			fprintf(stderr, "short write\n");
-			exit(1);
-		}
-		cur_size += ret;
-	}
+	write_pattern(fd, xxhash_state, buf, BUF_SIZE, cur_size, this_size - cur_size);
 	free(buf);
 }
 
@@ -880,79 +1088,68 @@
 	int fd;
 	int ret;
 	int i;
-	unsigned long read_bytes = read_size;
-	unsigned long long offset;
+	off_t offset;
+	ssize_t read_bytes = read_size;
+	struct stat st;
+	void *xxhash_state = XXH32_init(global_rand_seed);
+	char name[NAME_LEN];
 
-	if (read_bytes > file_size)
-		read_bytes = file_size;
-
-	/* pick a random MB starting point */
-	offset = rand() % (file_size / (1024 * 1024));
-	offset *= 1024 * 1024;
-	if (offset + read_bytes > file_size)
-		offset = file_size - read_bytes;
+	join_path(name, path, seq, DATA_FILE);
 
 	fd = open_path(path, seq, DATA_FILE, 0);
 
-	while (read_bytes > 0) {
-		ret = pread(fd, buf, BUF_SIZE, offset);
-		if (ret == 0)
-			break;
-		if (ret < 0) {
-			fprintf(stderr, "bad read %s seq %d ret %d offset %Lu\n", path, seq, ret, offset);
-			perror("read");
-			exit(1);
-		}
-		offset += ret;
-		read_bytes -= ret;
+	ret = fstat(fd, &st);
+	if (ret < 0) {
+		perror("stat");
+		exit(1);
 	}
 
+	offset = rand() % 100;
+	offset = (offset * st.st_size) / 100;
+	offset = verify_align(offset);
+
+	/* we are too big? */
+	if (offset + read_bytes > st.st_size)
+		offset = 0;
+	if (offset + read_bytes > st.st_size)
+		read_bytes = verify_align(st.st_size);
+
+	read_and_crc(fd, name, xxhash_state, buf, read_bytes, offset,
+		     read_bytes);
+
 	/* if we don't have writers making dirty inodes, make some here */
 	if (!write_size) {
 		for (i = 0; i < 8; i++)
 			dirty_an_inode(path);
 	}
 	close(fd);
+	XXH32_digest(xxhash_state);
 }
 
 /* creates a temp file in one of the subdirs and sends down write_bytes to it */
 static void write_to_file(char *path, int seq, char *buf)
 {
 	int fd;
-	int ret;
 	int i;
 	int write_bytes = randomize_size(write_size);
-	unsigned long long offset = 0;
+	loff_t offset;
+	void *xxhash_state = XXH32_init(global_rand_seed);
 
-	if (append_mode)
+	if (append_mode) {
 		fd = open_path(path, seq, DATA_FILE, O_APPEND);
-	else
-		fd = open_path(path, seq, RESULT_FILE, 0);
-
-	if (oddsizes && write_bytes > 4096)
-		write_bytes = write_bytes % 4096;
-
-	while (write_bytes > 0) {
-		int this_write = write_bytes;
-		if (this_write > BUF_SIZE)
-			this_write = BUF_SIZE;
-
-		ret = write(fd, buf, this_write);
-		if (ret == 0)
-			break;
-		if (ret < 0) {
-			fprintf(stderr, "bad write %s seq %d ret %d offset %Lu\n", path, seq, ret, offset);
-			perror("write");
+		offset = lseek(fd, 0, SEEK_CUR);
+		if (offset < 0) {
+			perror("lseek");
 			exit(1);
 		}
-		offset += ret;
-		write_bytes -= ret;
+	} else {
+		fd = open_path(path, seq, RESULT_FILE, 0);
+		offset = 0;
 	}
-	if (funksync) {
-		fsync(fd);
-		ftruncate(fd, 0);
-		fsync(fd);
-	}
+
+	write_pattern(fd, xxhash_state, buf, write_bytes, offset, write_bytes);
+	XXH32_digest(xxhash_state);
+
 	close(fd);
 
 	/* make some dirty inodes */
@@ -970,15 +1167,19 @@
 {
 	unsigned long seq;
 	int fd;
+	void *xxhash_state = XXH32_init(global_rand_seed);
 
 	for (seq = 0; seq < num_files; seq++) {
 		fd = open_path(path, seq, DATA_FILE, O_APPEND);
-		fill_one_file(fd);
+		fill_one_file(fd, xxhash_state);
 		close(fd);
 
 		/* cleanup from the last run */
 		unlink_extra(path, seq);
 	}
+
+	/* just to free the state */
+	XXH32_digest(xxhash_state);
 }
 
 void *filler_thread(void *arg)
@@ -1083,14 +1284,15 @@
 	struct timeval now;
 	struct timeval start;
 	struct thread_data *td = arg;
-	char *read_buf;
-	char *write_buf;
+	char *read_buf = NULL;
+	char *write_buf = NULL;
 	char *mem = NULL;
 	pthread_t *read_tids;
 	pthread_t *write_tids;
 	char *mmap_ptr;
-	int i;
 	int warmup_zerod = 0;
+	int i;
+	int ret;
 
 	read_tids = malloc(sizeof(*read_tids) * rw_threads);
 	write_tids = malloc(sizeof(*write_tids) * rw_threads);
@@ -1111,10 +1313,10 @@
 			memset(td->stats, 0, sizeof(*td->stats) * TOTAL_STATS);
 			warmup_zerod = 1;
 		}
-		read_buf = malloc(rw_threads * read_size);
-		write_buf = malloc(rw_threads * write_size);
+		ret = posix_memalign((void **)(&read_buf), getpagesize(), rw_threads * read_size);
+		ret |= posix_memalign((void **)(&write_buf), getpagesize(), rw_threads * write_size);
 
-		if (!read_buf || !write_buf) {
+		if (ret) {
 			perror("allocation");
 			exit(1);
 		}
@@ -1217,6 +1419,22 @@
 	return NULL;
 }
 
+static void save_vmstat_rates(struct vmstat_info *vmstat_info)
+{
+	int i;
+	for (i = 0; i < TOTAL_VMSTATS; i++) {
+		vmstat_info->last_rate[i] = vmstat_info->rate[i];
+	}
+}
+
+static void save_instant_vmstat_rates(struct vmstat_info *vmstat_info)
+{
+	int i;
+	for (i = 0; i < TOTAL_VMSTATS; i++) {
+		vmstat_info->instant_rate[i] = vmstat_info->rate[i];
+	}
+}
+
 /*
  * read in /proc/vmstat so we can sum the allocation stall lines and
  * print them out
@@ -1441,6 +1659,8 @@
 	if (du_threads > total_paths)
 		du_threads = total_paths;
 
+	init_pattern_buffer(pattern_buffer, global_rand_seed);
+
 	/* du threads might be zero */
 	du_tids = calloc(du_threads + 1, sizeof(pthread_t));
 
diff --git a/xxhash.c b/xxhash.c
new file mode 100644
index 0000000..4736c52
--- /dev/null
+++ b/xxhash.c
@@ -0,0 +1,421 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2014, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : http://code.google.com/p/xxhash/
+*/
+
+
+//**************************************
+// Tuning parameters
+//**************************************
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
+// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for uint32_t).
+#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USE_UNALIGNED_ACCESS 1
+#endif
+
+// XXH_ACCEPT_NULL_INPUT_POINTER :
+// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+// This option has a very small performance cost (only measurable on small inputs).
+// By default, this option is disabled. To enable it, uncomment below define :
+//#define XXH_ACCEPT_NULL_INPUT_POINTER 1
+
+// XXH_FORCE_NATIVE_FORMAT :
+// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+// Results are therefore identical for little-endian and big-endian CPU.
+// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+// Should endian-independance be of no importance for your application, you may set the #define below to 1.
+// It will improve speed for Big-endian CPU.
+// This option has no impact on Little_Endian CPU.
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+
+//**************************************
+// Includes & Memory related functions
+//**************************************
+#include "xxhash.h"
+#include <stdlib.h>
+#include <string.h>
+
+
+#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  ifdef __IBMC__
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct _uint32_t_S { uint32_t v; } _PACKED uint32_t_S;
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  pragma pack(pop)
+#endif
+
+#define A32(x) (((uint32_t_S *)(x))->v)
+
+
+//***************************************
+// Compiler-specific Functions and Macros
+//***************************************
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+
+#if defined(_MSC_VER)     // Visual Studio
+#  define XXH_swap32 _byteswap_ulong
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static inline uint32_t XXH_swap32 (uint32_t x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+        ((x <<  8) & 0x00ff0000 ) |
+        ((x >>  8) & 0x0000ff00 ) |
+        ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+//**************************************
+// Constants
+//**************************************
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+
+//**************************************
+// Architecture Macros
+//**************************************
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
+    static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
+
+
+//****************************
+// Memory reads
+//****************************
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+static uint32_t XXH_readLE32_align(const uint32_t* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
+    else
+        return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
+}
+
+static uint32_t XXH_readLE32(const uint32_t* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); }
+
+
+//****************************
+// Simple Hash Functions
+//****************************
+static uint32_t XXH32_endian_align(const void* input, int len, uint32_t seed, XXH_endianess endian, XXH_alignment align)
+{
+    const uint8_t *p = (const uint8_t *)input;
+    const uint8_t * const bEnd = p + len;
+    uint32_t h32;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL) { len=0; p=(const uint8_t *)(size_t)16; }
+#endif
+
+    if (len>=16)
+    {
+        const uint8_t * const limit = bEnd - 16;
+        uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+        uint32_t v2 = seed + PRIME32_2;
+        uint32_t v3 = seed + 0;
+        uint32_t v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_readLE32_align((const uint32_t*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
+            v2 += XXH_readLE32_align((const uint32_t*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
+            v3 += XXH_readLE32_align((const uint32_t*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
+            v4 += XXH_readLE32_align((const uint32_t*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
+        } while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (uint32_t) len;
+
+    while (p<=bEnd-4)
+    {
+        h32 += XXH_readLE32_align((const uint32_t*)p, endian, align) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+uint32_t XXH32(const void* input, uint32_t len, uint32_t seed)
+{
+#if 0
+    // Simple version, good for code maintenance, but unfortunately slow for small inputs
+    void* state = XXH32_init(seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 3))   // Input is aligned, let's leverage the speed advantage
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+//****************************
+// Advanced Hash Functions
+//****************************
+
+int XXH32_sizeofState(void)
+{
+    XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t));   // A compilation error here means XXH32_SIZEOFSTATE is not large enough
+    return sizeof(struct XXH_state32_t);
+}
+
+
+XXH_errorcode XXH32_resetState(void* state_in, uint32_t seed)
+{
+    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+void* XXH32_init (uint32_t seed)
+{
+    void *state = malloc (sizeof(struct XXH_state32_t));
+    XXH32_resetState(state, seed);
+    return state;
+}
+
+
+static XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
+{
+    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
+    const uint8_t *p = (const uint8_t *)input;
+    const uint8_t * const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   // fill in tmp buffer
+    {
+        memcpy(state->memory + state->memsize, input, len);
+        state->memsize +=  len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   // some data left from previous update
+    {
+        memcpy(state->memory + state->memsize, input, 16-state->memsize);
+        {
+            const uint32_t* p32 = (const uint32_t*)state->memory;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const uint8_t * const limit = bEnd - 16;
+        uint32_t v1 = state->v1;
+        uint32_t v2 = state->v2;
+        uint32_t v3 = state->v3;
+        uint32_t v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32((const uint32_t*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
+            v2 += XXH_readLE32((const uint32_t*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
+            v3 += XXH_readLE32((const uint32_t*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
+            v4 += XXH_readLE32((const uint32_t*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
+        } while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        memcpy(state->memory, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+static uint32_t XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
+{
+    struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
+    const uint8_t *p = (const uint8_t *)state->memory;
+    uint8_t * bEnd = (uint8_t *)state->memory + state->memsize;
+    uint32_t h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (uint32_t) state->total_len;
+
+    while (p<=bEnd-4)
+    {
+        h32 += XXH_readLE32((const uint32_t*)p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+uint32_t XXH32_intermediateDigest (void* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
+}
+
+
+uint32_t XXH32_digest (void* state_in)
+{
+    uint32_t h32 = XXH32_intermediateDigest(state_in);
+
+    free(state_in);
+
+    return h32;
+}
diff --git a/xxhash.h b/xxhash.h
new file mode 100644
index 0000000..8850d20
--- /dev/null
+++ b/xxhash.h
@@ -0,0 +1,177 @@
+/*
+   xxHash - Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+  
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+  
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : http://code.google.com/p/xxhash/
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function. 
+It depends on successfully passing SMHasher test set. 
+10 is a perfect score.
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#include <inttypes.h>
+
+struct XXH_state32_t
+{
+    uint64_t total_len;
+    uint32_t seed;
+    uint32_t v1;
+    uint32_t v2;
+    uint32_t v3;
+    uint32_t v4;
+    int memsize;
+    char memory[16];
+};
+
+//****************************
+// Type
+//****************************
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+
+//****************************
+// Simple Hash Functions
+//****************************
+
+uint32_t XXH32 (const void* input, uint32_t len, uint32_t seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
+    The memory between input & input+len must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+    Note that "len" is type "int", which means it is limited to 2^31-1.
+    If your data is larger, use the advanced functions below.
+*/
+
+
+
+//****************************
+// Advanced Hash Functions
+//****************************
+
+void*         XXH32_init   (unsigned int seed);
+XXH_errorcode XXH32_update (void* state, const void* input, int len);
+unsigned int  XXH32_digest (void* state);
+
+/*
+These functions calculate the xxhash of an input provided in several small packets,
+as opposed to an input provided as a single block.
+
+It must be started with :
+void* XXH32_init()
+The function returns a pointer which holds the state of calculation.
+
+This pointer must be provided as "void* state" parameter for XXH32_update().
+XXH32_update() can be called as many times as necessary.
+The user must provide a valid (allocated) input.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+Note that "len" is type "int", which means it is limited to 2^31-1. 
+If your data is larger, it is recommended to chunk your data into blocks 
+of size for example 2^30 (1GB) to avoid any "int" overflow issue.
+
+Finally, you can end the calculation anytime, by using XXH32_digest().
+This function returns the final 32-bits hash.
+You must provide the same "void* state" parameter created by XXH32_init().
+Memory will be freed by XXH32_digest().
+*/
+
+
+int           XXH32_sizeofState(void);
+XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
+
+#define       XXH32_SIZEOFSTATE 48
+typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
+/*
+These functions allow user application to make its own allocation for state.
+
+XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state.
+Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
+This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state.
+
+For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
+use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
+*/
+
+
+unsigned int XXH32_intermediateDigest (void* state);
+/*
+This function does the same as XXH32_digest(), generating a 32-bit hash,
+but preserve memory context.
+This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update().
+To free memory context, use XXH32_digest(), or free().
+*/
+
+
+
+//****************************
+// Deprecated function names
+//****************************
+// The following translations are provided to ease code transition
+// You are encouraged to no longer this function names
+#define XXH32_feed   XXH32_update
+#define XXH32_result XXH32_digest
+#define XXH32_getIntermediateResult XXH32_intermediateDigest
+
+
+
+#if defined (__cplusplus)
+}
+#endif