fssum: be less strict on holes and data sections (-s and -S added)
By default, do not include information about a file's block structure into
the sum (-S option). That effectively treats a hole like zeros for the
purpose of checksum computation.
The old behavior is still available with the -s option.
diff --git a/fssum.c b/fssum.c
index 639d1c0..3f93ffc 100644
--- a/fssum.c
+++ b/fssum.c
@@ -54,6 +54,13 @@
int len;
};
+typedef struct _sum {
+ MD5_CTX md5;
+ unsigned char out[16];
+} sum_t;
+
+typedef int (*sum_file_data_t)(int fd, sum_t *dst);
+
int gen_manifest = 0;
int in_manifest = 0;
char *checksum = NULL;
@@ -71,13 +78,14 @@
FLAG_CTIME,
FLAG_DATA,
FLAG_OPEN_ERROR,
+ FLAG_STRUCTURE,
NUM_FLAGS
};
-const char flchar[] = "ugoamcde";
+const char flchar[] = "ugoamcdes";
char line[65536];
-int flags[NUM_FLAGS] = { 1, 1, 1, 1, 1, 0, 1, 0 };
+int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0};
char *
getln(char *buf, int size, FILE *fp)
@@ -141,12 +149,13 @@
fprintf(stderr, " c : include ctime\n");
fprintf(stderr, " d : include file data\n");
fprintf(stderr, " e : include open errors (aborts otherwise)\n");
- fprintf(stderr, " -[UGOAMCDE] : exclude respective field from calculation\n");
+ fprintf(stderr, " s : include block structure (holes)\n");
+ fprintf(stderr, " -[UGOAMCDES]: exclude respective field from calculation\n");
fprintf(stderr, " -n : reset all flags\n");
fprintf(stderr, " -N : set all flags\n");
fprintf(stderr, " -x path : exclude path when building checksum (multiple ok)\n");
fprintf(stderr, " -h : this help\n\n");
- fprintf(stderr, "The default field mask is ugoamCdE. If the checksum/manifest is read from a\n");
+ fprintf(stderr, "The default field mask is ugoamCdES. If the checksum/manifest is read from a\n");
fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
fprintf(stderr, "are ignored.\n");
exit(-1);
@@ -154,11 +163,6 @@
static char buf[65536];
-typedef struct _sum {
- MD5_CTX md5;
- unsigned char out[16];
-} sum_t;
-
void *
alloc(size_t sz)
{
@@ -222,7 +226,67 @@
}
int
-sum_file_data(int fd, sum_t *dst)
+sum_file_data_permissive(int fd, sum_t *dst)
+{
+ int ret;
+ off_t pos;
+ off_t old;
+ int i;
+ uint64_t zeros = 0;
+
+ pos = lseek(fd, 0, SEEK_CUR);
+ if (pos == (off_t)-1)
+ return errno == ENXIO ? 0 : -2;
+
+ while (1) {
+ old = pos;
+ pos = lseek(fd, pos, SEEK_DATA);
+ if (pos == (off_t)-1) {
+ if (errno == ENXIO) {
+ ret = 0;
+ pos = lseek(fd, 0, SEEK_END);
+ if (pos != (off_t)-1)
+ zeros += pos - old;
+ } else {
+ ret = -2;
+ }
+ break;
+ }
+ ret = read(fd, buf, sizeof(buf));
+ assert(ret); /* eof found by lseek */
+ if (ret <= 0)
+ break;
+ if (old < pos) /* hole */
+ zeros += pos - old;
+ for (i = 0; i < ret; ++i) {
+ for (old = i; buf[i] == 0 && i < ret; ++i)
+ ;
+ if (old < i) /* code like a hole */
+ zeros += i - old;
+ if (i == ret)
+ break;
+ if (zeros) {
+ sum_add_u64(dst, 0);
+ sum_add_u64(dst, zeros);
+ zeros = 0;
+ }
+ for (old = i; buf[i] != 0 && i < ret; ++i)
+ ;
+ sum_add(dst, buf + old, i - old);
+ }
+ pos += ret;
+ }
+
+ if (zeros) {
+ sum_add_u64(dst, 0);
+ sum_add_u64(dst, zeros);
+ }
+
+ return ret;
+}
+
+int
+sum_file_data_strict(int fd, sum_t *dst)
{
int ret;
off_t pos;
@@ -403,6 +467,8 @@
int ret;
int fd;
int excl;
+ sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
+ sum_file_data_strict : sum_file_data_permissive;
d = fdopendir(dirfd);
if (!d) {
@@ -563,9 +629,10 @@
int plen;
int elen;
int n_flags = 0;
+ const char *allopts = "heEfuUgGoOaAmMcCdDsSnNw:r:x:";
out_fp = stdout;
- while ((c = getopt(argc, argv, "heEfuUgGoOaAmMcCdDnNw:r:x:")) != EOF) {
+ while ((c = getopt(argc, argv, allopts)) != EOF) {
switch(c) {
case 'f':
gen_manifest = 1;
@@ -586,6 +653,8 @@
case 'D':
case 'e':
case 'E':
+ case 's':
+ case 'S':
++n_flags;
parse_flag(c);
break;