fssum: be less strict on holes and data sections (-s and -S added)

By default, do not include information about a file's block structure into
the sum (-S option). That effectively treats a hole like zeros for the
purpose of checksum computation.

The old behavior is still available with the -s option.
diff --git a/fssum.c b/fssum.c
index 639d1c0..3f93ffc 100644
--- a/fssum.c
+++ b/fssum.c
@@ -54,6 +54,13 @@
 	int len;
 };
 
+typedef struct _sum {
+	MD5_CTX 	md5;
+	unsigned char	out[16];
+} sum_t;
+
+typedef int (*sum_file_data_t)(int fd, sum_t *dst);
+
 int gen_manifest = 0;
 int in_manifest = 0;
 char *checksum = NULL;
@@ -71,13 +78,14 @@
 	FLAG_CTIME,
 	FLAG_DATA,
 	FLAG_OPEN_ERROR,
+	FLAG_STRUCTURE,
 	NUM_FLAGS
 };
 
-const char flchar[] = "ugoamcde";
+const char flchar[] = "ugoamcdes";
 char line[65536];
 
-int flags[NUM_FLAGS] = { 1, 1, 1, 1, 1, 0, 1, 0 };
+int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0};
 
 char *
 getln(char *buf, int size, FILE *fp)
@@ -141,12 +149,13 @@
 	fprintf(stderr, "         c      : include ctime\n");
 	fprintf(stderr, "         d      : include file data\n");
 	fprintf(stderr, "         e      : include open errors (aborts otherwise)\n");
-	fprintf(stderr, "    -[UGOAMCDE] : exclude respective field from calculation\n");
+	fprintf(stderr, "         s      : include block structure (holes)\n");
+	fprintf(stderr, "    -[UGOAMCDES]: exclude respective field from calculation\n");
 	fprintf(stderr, "    -n          : reset all flags\n");
 	fprintf(stderr, "    -N          : set all flags\n");
 	fprintf(stderr, "    -x path     : exclude path when building checksum (multiple ok)\n");
 	fprintf(stderr, "    -h          : this help\n\n");
-	fprintf(stderr, "The default field mask is ugoamCdE. If the checksum/manifest is read from a\n");
+	fprintf(stderr, "The default field mask is ugoamCdES. If the checksum/manifest is read from a\n");
 	fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
 	fprintf(stderr, "are ignored.\n");
 	exit(-1);
@@ -154,11 +163,6 @@
 
 static char buf[65536];
 
-typedef struct _sum {
-	MD5_CTX 	md5;
-	unsigned char	out[16];
-} sum_t;
-
 void *
 alloc(size_t sz)
 {
@@ -222,7 +226,67 @@
 }
 
 int
-sum_file_data(int fd, sum_t *dst)
+sum_file_data_permissive(int fd, sum_t *dst)
+{
+	int ret;
+	off_t pos;
+	off_t old;
+	int i;
+	uint64_t zeros = 0;
+
+	pos = lseek(fd, 0, SEEK_CUR);
+	if (pos == (off_t)-1)
+		return errno == ENXIO ? 0 : -2;
+
+	while (1) {
+		old = pos;
+		pos = lseek(fd, pos, SEEK_DATA);
+		if (pos == (off_t)-1) {
+			if (errno == ENXIO) {
+				ret = 0;
+				pos = lseek(fd, 0, SEEK_END);
+				if (pos != (off_t)-1)
+					zeros += pos - old;
+			} else {
+				ret = -2;
+			}
+			break;
+		}
+		ret = read(fd, buf, sizeof(buf));
+		assert(ret); /* eof found by lseek */
+		if (ret <= 0)
+			break;
+		if (old < pos) /* hole */
+			zeros += pos - old;
+		for (i = 0; i < ret; ++i) {
+			for (old = i; buf[i] == 0 && i < ret; ++i)
+				;
+			if (old < i) /* code like a hole */
+				zeros += i - old;
+			if (i == ret)
+				break;
+			if (zeros) {
+				sum_add_u64(dst, 0);
+				sum_add_u64(dst, zeros);
+				zeros = 0;
+			}
+			for (old = i; buf[i] != 0 && i < ret; ++i)
+				;
+			sum_add(dst, buf + old, i - old);
+		}
+		pos += ret;
+	}
+
+	if (zeros) {
+		sum_add_u64(dst, 0);
+		sum_add_u64(dst, zeros);
+	}
+
+	return ret;
+}
+
+int
+sum_file_data_strict(int fd, sum_t *dst)
 {
 	int ret;
 	off_t pos;
@@ -403,6 +467,8 @@
 	int ret;
 	int fd;
 	int excl;
+	sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
+			sum_file_data_strict : sum_file_data_permissive;
 
 	d = fdopendir(dirfd);
 	if (!d) {
@@ -563,9 +629,10 @@
 	int plen;
 	int elen;
 	int n_flags = 0;
+	const char *allopts = "heEfuUgGoOaAmMcCdDsSnNw:r:x:";
 
 	out_fp = stdout;
-	while ((c = getopt(argc, argv, "heEfuUgGoOaAmMcCdDnNw:r:x:")) != EOF) {
+	while ((c = getopt(argc, argv, allopts)) != EOF) {
 		switch(c) {
 		case 'f':
 			gen_manifest = 1;
@@ -586,6 +653,8 @@
 		case 'D':
 		case 'e':
 		case 'E':
+		case 's':
+		case 'S':
 			++n_flags;
 			parse_flag(c);
 			break;