| // SPDX-License-Identifier: GPL-2.0 | 
 | #include <linux/fs.h> | 
 | #include <sys/types.h> | 
 | #include <sys/stat.h> | 
 | #include <sys/ioctl.h> | 
 | #include <fcntl.h> | 
 | #include <stdio.h> | 
 | #include <stdlib.h> | 
 | #include <errno.h> | 
 | #include <unistd.h> | 
 | #include <string.h> | 
 | #include "log-writes.h" | 
 |  | 
 | int log_writes_verbose = 0; | 
 |  | 
 | /* | 
 |  * @log: the log to free. | 
 |  * | 
 |  * This will close any open fd's the log has and free up its memory. | 
 |  */ | 
 | void log_free(struct log *log) | 
 | { | 
 | 	if (log->replayfd >= 0) | 
 | 		close(log->replayfd); | 
 | 	if (log->logfd >= 0) | 
 | 		close(log->logfd); | 
 | 	free(log); | 
 | } | 
 |  | 
 | static int discard_range(struct log *log, u64 start, u64 len) | 
 | { | 
 | 	u64 range[2] = { start, len }; | 
 |  | 
 | 	if (ioctl(log->replayfd, BLKDISCARD, &range) < 0) { | 
 | 		if (log_writes_verbose) | 
 | 			printf("replay device doesn't support discard, " | 
 | 			       "switching to writing zeros\n"); | 
 | 		log->flags |= LOG_DISCARD_NOT_SUPP; | 
 | 	} | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int zero_range(struct log *log, u64 start, u64 len) | 
 | { | 
 | 	u64 bufsize = len; | 
 | 	ssize_t ret; | 
 | 	char *buf = NULL; | 
 |  | 
 | 	if (log->max_zero_size < len) { | 
 | 		if (log_writes_verbose) | 
 | 			printf("discard len %llu larger than max %llu\n", | 
 | 			       (unsigned long long)len, | 
 | 			       (unsigned long long)log->max_zero_size); | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	while (!buf) { | 
 | 		buf = malloc(bufsize); | 
 | 		if (!buf) | 
 | 			bufsize >>= 1; | 
 | 		if (!bufsize) { | 
 | 			fprintf(stderr, "Couldn't allocate zero buffer"); | 
 | 			return -1; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	memset(buf, 0, bufsize); | 
 | 	while (len) { | 
 | 		if (len < bufsize) | 
 | 			bufsize = len; | 
 |  | 
 | 		ret = pwrite(log->replayfd, buf, bufsize, start); | 
 | 		if (ret != bufsize) { | 
 | 			fprintf(stderr, "Error zeroing file: %d\n", errno); | 
 | 			free(buf); | 
 | 			return -1; | 
 | 		} | 
 | 		len -= ret; | 
 | 		start += ret; | 
 | 	} | 
 | 	free(buf); | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * @log: the log we are replaying. | 
 |  * @entry: the discard entry. | 
 |  * | 
 |  * Discard the given length.  If the device supports discard we will call that | 
 |  * ioctl, otherwise we will write 0's to emulate discard.  If the discard size | 
 |  * is larger than log->max_zero_size then we will simply skip the zero'ing if | 
 |  * the drive doesn't support discard. | 
 |  */ | 
 | int log_discard(struct log *log, struct log_write_entry *entry) | 
 | { | 
 | 	u64 start = le64_to_cpu(entry->sector) * log->sectorsize; | 
 | 	u64 size = le64_to_cpu(entry->nr_sectors) * log->sectorsize; | 
 | 	u64 max_chunk = 1 * 1024 * 1024 * 1024; | 
 |  | 
 | 	if (log->flags & LOG_IGNORE_DISCARD) | 
 | 		return 0; | 
 |  | 
 | 	while (size) { | 
 | 		u64 len = size > max_chunk ? max_chunk : size; | 
 | 		int ret; | 
 |  | 
 | 		/* | 
 | 		 * Do this check first in case it is our first discard, that way | 
 | 		 * if we return EOPNOTSUPP we will fall back to the 0 method | 
 | 		 * automatically. | 
 | 		 */ | 
 | 		if (!(log->flags & LOG_DISCARD_NOT_SUPP)) | 
 | 			ret = discard_range(log, start, len); | 
 | 		if (log->flags & LOG_DISCARD_NOT_SUPP) | 
 | 			ret = zero_range(log, start, len); | 
 | 		if (ret) | 
 | 			return -1; | 
 | 		size -= len; | 
 | 		start += len; | 
 | 	} | 
 | 	return 0; | 
 | } | 
 |  | 
 | #define DEFINE_LOG_FLAGS_STR_ENTRY(x)	\ | 
 | 	{LOG_##x##_FLAG, #x} | 
 |  | 
 | struct flags_to_str_entry { | 
 | 	u64 flags; | 
 | 	const char *str; | 
 | } log_flags_table[] = { | 
 | 	DEFINE_LOG_FLAGS_STR_ENTRY(FLUSH), | 
 | 	DEFINE_LOG_FLAGS_STR_ENTRY(FUA), | 
 | 	DEFINE_LOG_FLAGS_STR_ENTRY(DISCARD), | 
 | 	DEFINE_LOG_FLAGS_STR_ENTRY(MARK), | 
 | 	DEFINE_LOG_FLAGS_STR_ENTRY(METADATA) | 
 | }; | 
 |  | 
 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | 
 | #define LOG_FLAGS_BUF_SIZE	128 | 
 | /* | 
 |  * Convert numeric flags to human readable flags. | 
 |  * @flags:	numeric flags | 
 |  * @buf:	output buffer for human readable string. | 
 |  * 		must have enough space (LOG_FLAGS_BUF_SIZE) to contain all | 
 |  * 		the string | 
 |  */ | 
 | static void entry_flags_to_str(u64 flags, char *buf) | 
 | { | 
 | 	int empty = 1; | 
 | 	int left_len; | 
 | 	int i; | 
 |  | 
 | 	buf[0] = '\0'; | 
 | 	for (i = 0; i < ARRAY_SIZE(log_flags_table); i++) { | 
 | 		if (flags & log_flags_table[i].flags) { | 
 | 			if (!empty) | 
 | 				strncat(buf, "|", LOG_FLAGS_BUF_SIZE); | 
 | 			empty = 0; | 
 | 			strncat(buf, log_flags_table[i].str, LOG_FLAGS_BUF_SIZE); | 
 | 			flags &= ~log_flags_table[i].flags; | 
 | 		} | 
 | 	} | 
 | 	if (flags) { | 
 | 		if (!empty) | 
 | 			strncat(buf, "|", LOG_FLAGS_BUF_SIZE); | 
 | 		empty = 0; | 
 | 		left_len = LOG_FLAGS_BUF_SIZE - strnlen(buf, | 
 | 						        LOG_FLAGS_BUF_SIZE); | 
 | 		if (left_len > 0) | 
 | 			snprintf(buf + strnlen(buf, LOG_FLAGS_BUF_SIZE), | 
 | 				 left_len, "UNKNOWN.0x%llx", flags); | 
 | 	} | 
 | 	if (empty) | 
 | 		strncpy(buf, "NONE", LOG_FLAGS_BUF_SIZE); | 
 | } | 
 |  | 
 | /* | 
 |  * @log: the log we are replaying. | 
 |  * @entry: entry to be replayed. | 
 |  * | 
 |  * @return: 0 if we should replay the entry, > 0 if we should skip it. | 
 |  * | 
 |  * Should we skip the entry in our log or replay onto the replay device. | 
 |  */ | 
 | int log_should_skip(struct log *log, struct log_write_entry *entry) | 
 | { | 
 | 	u64 sector = le64_to_cpu(entry->sector); | 
 | 	u64 nr_sectors = le64_to_cpu(entry->nr_sectors); | 
 |  | 
 | 	if (!nr_sectors) | 
 | 		return 0; | 
 | 	if (sector + nr_sectors <= log->start_sector || | 
 | 	    sector > log->end_sector) | 
 | 		return 1; | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * @entry: entry to be replayed. | 
 |  * | 
 |  * @return: 1 if the entry is sane, 0 if it is invalid. | 
 |  * | 
 |  * Check if this is a sane log entry. | 
 |  */ | 
 | int log_entry_valid(struct log_write_entry *entry) | 
 | { | 
 | 	u64 flags = le64_to_cpu(entry->flags); | 
 |  | 
 | 	/* Suspect all zeroes entry */ | 
 | 	if (!flags && !entry->nr_sectors) | 
 | 		return 0; | 
 | 	/* Suspect non zero padded entry */ | 
 | 	if (flags != LOG_MARK_FLAG && entry->data[0] != 0) | 
 | 		return 0; | 
 | 	return 1; | 
 | } | 
 |  | 
 | /* | 
 |  * @log: the log we are replaying. | 
 |  * @entry: where we put the entry. | 
 |  * @read_data: read the entry data as well, entry must be log->sectorsize sized | 
 |  * if this is set. | 
 |  * | 
 |  * @return: 0 if we replayed, 1 if we are at the end, -1 if there was an error. | 
 |  * | 
 |  * Replay the next entry in our log onto the replay device. | 
 |  */ | 
 | int log_replay_next_entry(struct log *log, struct log_write_entry *entry, | 
 | 			  int read_data) | 
 | { | 
 | 	u64 size; | 
 | 	u64 flags; | 
 | 	size_t read_size = read_data ? log->sectorsize : | 
 | 		sizeof(struct log_write_entry); | 
 | 	char *buf; | 
 | 	char flags_buf[LOG_FLAGS_BUF_SIZE]; | 
 | 	ssize_t ret; | 
 | 	off_t offset; | 
 | 	int skip = 0; | 
 |  | 
 | 	if (log->cur_entry >= log->nr_entries) | 
 | 		return 1; | 
 |  | 
 | 	ret = read(log->logfd, entry, read_size); | 
 | 	if (ret != read_size) { | 
 | 		fprintf(stderr, "Error reading entry: %d\n", errno); | 
 | 		return -1; | 
 | 	} | 
 | 	if (!log_entry_valid(entry)) { | 
 | 		fprintf(stderr, "Malformed entry @%llu\n", | 
 | 				log->cur_pos / log->sectorsize); | 
 | 		return -1; | 
 | 	} | 
 | 	log->cur_entry++; | 
 |  | 
 | 	size = le64_to_cpu(entry->nr_sectors) * log->sectorsize; | 
 | 	if (read_size < log->sectorsize) { | 
 | 		log->cur_pos = lseek(log->logfd, | 
 | 			log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR); | 
 | 		if (log->cur_pos == (off_t)-1) { | 
 | 			fprintf(stderr, "Error seeking in log: %d\n", errno); | 
 | 			return -1; | 
 | 		} | 
 | 	} else { | 
 | 		log->cur_pos += read_size; | 
 | 	} | 
 |  | 
 | 	flags = le64_to_cpu(entry->flags); | 
 | 	entry_flags_to_str(flags, flags_buf); | 
 | 	skip = log_should_skip(log, entry); | 
 | 	if (log_writes_verbose > 1 || (log_writes_verbose && !skip)) { | 
 | 		printf("%s %d@%llu: sector %llu, size %llu, flags 0x%llx(%s)\n", | 
 | 		       skip ? "skipping" : "replaying", | 
 | 		       (int)log->cur_entry - 1, log->cur_pos / log->sectorsize, | 
 | 		       (unsigned long long)le64_to_cpu(entry->sector), | 
 | 		       (unsigned long long)size, | 
 | 		       (unsigned long long)flags, flags_buf); | 
 | 	} | 
 | 	if (!size) | 
 | 		return 0; | 
 |  | 
 | 	if (flags & LOG_DISCARD_FLAG) | 
 | 		return log_discard(log, entry); | 
 |  | 
 | 	if (skip) { | 
 | 		log->cur_pos = lseek(log->logfd, size, SEEK_CUR); | 
 | 		if (log->cur_pos == (off_t)-1) { | 
 | 			fprintf(stderr, "Error seeking in log: %d\n", errno); | 
 | 			return -1; | 
 | 		} | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	buf = malloc(size); | 
 | 	if (!buf) { | 
 | 		fprintf(stderr, "Error allocating buffer %llu entry %llu\n", (unsigned long long)size, (unsigned long long)log->cur_entry - 1); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	ret = read(log->logfd, buf, size); | 
 | 	if (ret != size) { | 
 | 		fprintf(stderr, "Error reading data: %d\n", errno); | 
 | 		free(buf); | 
 | 		return -1; | 
 | 	} | 
 | 	log->cur_pos += size; | 
 |  | 
 | 	offset = le64_to_cpu(entry->sector) * log->sectorsize; | 
 | 	ret = pwrite(log->replayfd, buf, size, offset); | 
 | 	free(buf); | 
 | 	if (ret != size) { | 
 | 		fprintf(stderr, "Error writing data: %d\n", errno); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * @log: the log we are manipulating. | 
 |  * @entry_num: the entry we want. | 
 |  * | 
 |  * Seek to the given entry in the log, starting at 0 and ending at | 
 |  * log->nr_entries - 1. | 
 |  */ | 
 | int log_seek_entry(struct log *log, u64 entry_num) | 
 | { | 
 | 	u64 i = 0; | 
 |  | 
 | 	if (entry_num >= log->nr_entries) { | 
 | 		fprintf(stderr, "Invalid entry number\n"); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	/* Skip the first sector containing the log super block */ | 
 | 	log->cur_pos = lseek(log->logfd, log->sectorsize, SEEK_SET); | 
 | 	if (log->cur_pos == (off_t)-1) { | 
 | 		fprintf(stderr, "Error seeking in file: %d\n", errno); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	log->cur_entry = 0; | 
 | 	for (i = 0; i < entry_num; i++) { | 
 | 		struct log_write_entry entry; | 
 | 		ssize_t ret; | 
 | 		off_t seek_size; | 
 | 		u64 flags; | 
 |  | 
 | 		ret = read(log->logfd, &entry, sizeof(entry)); | 
 | 		if (ret != sizeof(entry)) { | 
 | 			fprintf(stderr, "Error reading entry: %d\n", errno); | 
 | 			return -1; | 
 | 		} | 
 | 		if (!log_entry_valid(&entry)) { | 
 | 			fprintf(stderr, "Malformed entry @%llu\n", | 
 | 					log->cur_pos / log->sectorsize); | 
 | 			return -1; | 
 | 		} | 
 | 		if (log_writes_verbose > 1) | 
 | 			printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx\n", | 
 | 			       (int)i, log->cur_pos / log->sectorsize, | 
 | 			       (unsigned long long)le64_to_cpu(entry.sector), | 
 | 			       (unsigned long long)le64_to_cpu(entry.nr_sectors), | 
 | 			       (unsigned long long)le64_to_cpu(entry.flags)); | 
 | 		flags = le64_to_cpu(entry.flags); | 
 | 		seek_size = log->sectorsize - sizeof(entry); | 
 | 		if (!(flags & LOG_DISCARD_FLAG)) | 
 | 			seek_size += le64_to_cpu(entry.nr_sectors) * | 
 | 				log->sectorsize; | 
 | 		log->cur_pos = lseek(log->logfd, seek_size, SEEK_CUR); | 
 | 		if (log->cur_pos == (off_t)-1) { | 
 | 			fprintf(stderr, "Error seeking in file: %d\n", errno); | 
 | 			return -1; | 
 | 		} | 
 | 		log->cur_entry++; | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * @log: the log we are manipulating. | 
 |  * @entry: the entry we read. | 
 |  * @read_data: read the extra data for the entry, your entry must be | 
 |  * log->sectorsize large. | 
 |  * | 
 |  * @return: 1 if we hit the end of the log, 0 we got the next entry, < 0 if | 
 |  * there was an error. | 
 |  * | 
 |  * Seek to the next entry in the log. | 
 |  */ | 
 | int log_seek_next_entry(struct log *log, struct log_write_entry *entry, | 
 | 			int read_data) | 
 | { | 
 | 	size_t read_size = read_data ? log->sectorsize : | 
 | 		sizeof(struct log_write_entry); | 
 | 	u64 flags; | 
 | 	char flags_buf[LOG_FLAGS_BUF_SIZE]; | 
 | 	ssize_t ret; | 
 |  | 
 | 	if (log->cur_entry >= log->nr_entries) | 
 | 		return 1; | 
 |  | 
 | 	ret = read(log->logfd, entry, read_size); | 
 | 	if (ret != read_size) { | 
 | 		fprintf(stderr, "Error reading entry: %d\n", errno); | 
 | 		return -1; | 
 | 	} | 
 | 	if (!log_entry_valid(entry)) { | 
 | 		fprintf(stderr, "Malformed entry @%llu\n", | 
 | 				log->cur_pos / log->sectorsize); | 
 | 		return -1; | 
 | 	} | 
 | 	log->cur_entry++; | 
 |  | 
 | 	if (read_size < log->sectorsize) { | 
 | 		log->cur_pos = lseek(log->logfd, | 
 | 			log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR); | 
 | 		if (log->cur_pos == (off_t)-1) { | 
 | 			fprintf(stderr, "Error seeking in log: %d\n", errno); | 
 | 			return -1; | 
 | 		} | 
 | 	} else { | 
 | 		log->cur_pos += read_size; | 
 | 	} | 
 | 	flags = le64_to_cpu(entry->flags); | 
 | 	entry_flags_to_str(flags, flags_buf); | 
 | 	if (log_writes_verbose > 1) | 
 | 		printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx(%s)\n", | 
 | 		       (int)log->cur_entry - 1, log->cur_pos / log->sectorsize, | 
 | 		       (unsigned long long)le64_to_cpu(entry->sector), | 
 | 		       (unsigned long long)le64_to_cpu(entry->nr_sectors), | 
 | 		       (unsigned long long)flags, flags_buf); | 
 |  | 
 | 	read_size = le64_to_cpu(entry->nr_sectors) * log->sectorsize; | 
 | 	if (!read_size || (flags & LOG_DISCARD_FLAG)) | 
 | 		return 0; | 
 |  | 
 | 	log->cur_pos = lseek(log->logfd, read_size, SEEK_CUR); | 
 | 	if (log->cur_pos == (off_t)-1) { | 
 | 		fprintf(stderr, "Error seeking in log: %d\n", errno); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * @logfile: the file that contains the write log. | 
 |  * @replayfile: the file/device to replay onto, can be NULL. | 
 |  * | 
 |  * Opens a logfile and makes sure it is valid and returns a struct log. | 
 |  */ | 
 | struct log *log_open(char *logfile, char *replayfile) | 
 | { | 
 | 	struct log *log; | 
 | 	struct log_write_super super; | 
 | 	ssize_t ret; | 
 |  | 
 | 	log = malloc(sizeof(struct log)); | 
 | 	if (!log) { | 
 | 		fprintf(stderr, "Couldn't alloc log\n"); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	log->replayfd = -1; | 
 |  | 
 | 	log->logfd = open(logfile, O_RDONLY); | 
 | 	if (log->logfd < 0) { | 
 | 		fprintf(stderr, "Couldn't open log %s: %d\n", logfile, | 
 | 			errno); | 
 | 		log_free(log); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	if (replayfile) { | 
 | 		log->replayfd = open(replayfile, O_WRONLY); | 
 | 		if (log->replayfd < 0) { | 
 | 			fprintf(stderr, "Couldn't open replay file %s: %d\n", | 
 | 				replayfile, errno); | 
 | 			log_free(log); | 
 | 			return NULL; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	ret = read(log->logfd, &super, sizeof(struct log_write_super)); | 
 | 	if (ret < sizeof(struct log_write_super)) { | 
 | 		fprintf(stderr, "Error reading super: %d\n", errno); | 
 | 		log_free(log); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	if (le64_to_cpu(super.magic) != WRITE_LOG_MAGIC) { | 
 | 		fprintf(stderr, "Magic doesn't match\n"); | 
 | 		log_free(log); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	if (le64_to_cpu(super.version) != WRITE_LOG_VERSION) { | 
 | 		fprintf(stderr, "Version mismatch, wanted %d, have %d\n", | 
 | 			WRITE_LOG_VERSION, (int)le64_to_cpu(super.version)); | 
 | 		log_free(log); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	log->sectorsize = le32_to_cpu(super.sectorsize); | 
 | 	log->nr_entries = le64_to_cpu(super.nr_entries); | 
 | 	log->max_zero_size = 128 * 1024 * 1024; | 
 |  | 
 | 	log->cur_pos = lseek(log->logfd, log->sectorsize - sizeof(super), SEEK_CUR); | 
 | 	if (log->cur_pos == (off_t) -1) { | 
 | 		fprintf(stderr, "Error seeking to first entry: %d\n", errno); | 
 | 		log_free(log); | 
 | 		return NULL; | 
 | 	} | 
 | 	log->cur_entry = 0; | 
 |  | 
 | 	return log; | 
 | } |