blob: aa53473974d9e85d4a12a193ba492d6e0e74108c [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
#include <linux/fs.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "log-writes.h"
int log_writes_verbose = 0;
/*
* @log: the log to free.
*
* This will close any open fd's the log has and free up its memory.
*/
void log_free(struct log *log)
{
if (log->replayfd >= 0)
close(log->replayfd);
if (log->logfd >= 0)
close(log->logfd);
free(log);
}
static int discard_range(struct log *log, u64 start, u64 len)
{
u64 range[2] = { start, len };
if (ioctl(log->replayfd, BLKDISCARD, &range) < 0) {
if (log_writes_verbose)
printf("replay device doesn't support discard, "
"switching to writing zeros\n");
log->flags |= LOG_DISCARD_NOT_SUPP;
}
return 0;
}
static int zero_range(struct log *log, u64 start, u64 len)
{
u64 bufsize = len;
ssize_t ret;
char *buf = NULL;
if (log->max_zero_size < len) {
if (log_writes_verbose)
printf("discard len %llu larger than max %llu\n",
(unsigned long long)len,
(unsigned long long)log->max_zero_size);
return 0;
}
while (!buf) {
buf = malloc(bufsize);
if (!buf)
bufsize >>= 1;
if (!bufsize) {
fprintf(stderr, "Couldn't allocate zero buffer");
return -1;
}
}
memset(buf, 0, bufsize);
while (len) {
if (len < bufsize)
bufsize = len;
ret = pwrite(log->replayfd, buf, bufsize, start);
if (ret != bufsize) {
fprintf(stderr, "Error zeroing file: %d\n", errno);
free(buf);
return -1;
}
len -= ret;
start += ret;
}
free(buf);
return 0;
}
/*
* @log: the log we are replaying.
* @entry: the discard entry.
*
* Discard the given length. If the device supports discard we will call that
* ioctl, otherwise we will write 0's to emulate discard. If the discard size
* is larger than log->max_zero_size then we will simply skip the zero'ing if
* the drive doesn't support discard.
*/
int log_discard(struct log *log, struct log_write_entry *entry)
{
u64 start = le64_to_cpu(entry->sector) * log->sectorsize;
u64 size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
u64 max_chunk = 1 * 1024 * 1024 * 1024;
if (log->flags & LOG_IGNORE_DISCARD)
return 0;
while (size) {
u64 len = size > max_chunk ? max_chunk : size;
int ret;
/*
* Do this check first in case it is our first discard, that way
* if we return EOPNOTSUPP we will fall back to the 0 method
* automatically.
*/
if (!(log->flags & LOG_DISCARD_NOT_SUPP))
ret = discard_range(log, start, len);
if (log->flags & LOG_DISCARD_NOT_SUPP)
ret = zero_range(log, start, len);
if (ret)
return -1;
size -= len;
start += len;
}
return 0;
}
#define DEFINE_LOG_FLAGS_STR_ENTRY(x) \
{LOG_##x##_FLAG, #x}
struct flags_to_str_entry {
u64 flags;
const char *str;
} log_flags_table[] = {
DEFINE_LOG_FLAGS_STR_ENTRY(FLUSH),
DEFINE_LOG_FLAGS_STR_ENTRY(FUA),
DEFINE_LOG_FLAGS_STR_ENTRY(DISCARD),
DEFINE_LOG_FLAGS_STR_ENTRY(MARK),
DEFINE_LOG_FLAGS_STR_ENTRY(METADATA)
};
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define LOG_FLAGS_BUF_SIZE 128
/*
* Convert numeric flags to human readable flags.
* @flags: numeric flags
* @buf: output buffer for human readable string.
* must have enough space (LOG_FLAGS_BUF_SIZE) to contain all
* the string
*/
static void entry_flags_to_str(u64 flags, char *buf)
{
int empty = 1;
int left_len;
int i;
buf[0] = '\0';
for (i = 0; i < ARRAY_SIZE(log_flags_table); i++) {
if (flags & log_flags_table[i].flags) {
if (!empty)
strncat(buf, "|", LOG_FLAGS_BUF_SIZE);
empty = 0;
strncat(buf, log_flags_table[i].str, LOG_FLAGS_BUF_SIZE);
flags &= ~log_flags_table[i].flags;
}
}
if (flags) {
if (!empty)
strncat(buf, "|", LOG_FLAGS_BUF_SIZE);
empty = 0;
left_len = LOG_FLAGS_BUF_SIZE - strnlen(buf,
LOG_FLAGS_BUF_SIZE);
if (left_len > 0)
snprintf(buf + strnlen(buf, LOG_FLAGS_BUF_SIZE),
left_len, "UNKNOWN.0x%llx", flags);
}
if (empty)
strncpy(buf, "NONE", LOG_FLAGS_BUF_SIZE);
}
/*
* @log: the log we are replaying.
* @entry: entry to be replayed.
*
* @return: 0 if we should replay the entry, > 0 if we should skip it.
*
* Should we skip the entry in our log or replay onto the replay device.
*/
int log_should_skip(struct log *log, struct log_write_entry *entry)
{
u64 sector = le64_to_cpu(entry->sector);
u64 nr_sectors = le64_to_cpu(entry->nr_sectors);
if (!nr_sectors)
return 0;
if (sector + nr_sectors <= log->start_sector ||
sector > log->end_sector)
return 1;
return 0;
}
/*
* @entry: entry to be replayed.
*
* @return: 1 if the entry is sane, 0 if it is invalid.
*
* Check if this is a sane log entry.
*/
int log_entry_valid(struct log_write_entry *entry)
{
u64 flags = le64_to_cpu(entry->flags);
/* Suspect all zeroes entry */
if (!flags && !entry->nr_sectors)
return 0;
/* Suspect non zero padded entry */
if (flags != LOG_MARK_FLAG && entry->data[0] != 0)
return 0;
return 1;
}
/*
* @log: the log we are replaying.
* @entry: where we put the entry.
* @read_data: read the entry data as well, entry must be log->sectorsize sized
* if this is set.
*
* @return: 0 if we replayed, 1 if we are at the end, -1 if there was an error.
*
* Replay the next entry in our log onto the replay device.
*/
int log_replay_next_entry(struct log *log, struct log_write_entry *entry,
int read_data)
{
u64 size;
u64 flags;
size_t read_size = read_data ? log->sectorsize :
sizeof(struct log_write_entry);
char *buf;
char flags_buf[LOG_FLAGS_BUF_SIZE];
ssize_t ret;
off_t offset;
int skip = 0;
if (log->cur_entry >= log->nr_entries)
return 1;
ret = read(log->logfd, entry, read_size);
if (ret != read_size) {
fprintf(stderr, "Error reading entry: %d\n", errno);
return -1;
}
if (!log_entry_valid(entry)) {
fprintf(stderr, "Malformed entry @%llu\n",
log->cur_pos / log->sectorsize);
return -1;
}
log->cur_entry++;
size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
if (read_size < log->sectorsize) {
log->cur_pos = lseek(log->logfd,
log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in log: %d\n", errno);
return -1;
}
} else {
log->cur_pos += read_size;
}
flags = le64_to_cpu(entry->flags);
entry_flags_to_str(flags, flags_buf);
skip = log_should_skip(log, entry);
if (log_writes_verbose > 1 || (log_writes_verbose && !skip)) {
printf("%s %d@%llu: sector %llu, size %llu, flags 0x%llx(%s)\n",
skip ? "skipping" : "replaying",
(int)log->cur_entry - 1, log->cur_pos / log->sectorsize,
(unsigned long long)le64_to_cpu(entry->sector),
(unsigned long long)size,
(unsigned long long)flags, flags_buf);
}
if (!size)
return 0;
if (flags & LOG_DISCARD_FLAG)
return log_discard(log, entry);
if (skip) {
log->cur_pos = lseek(log->logfd, size, SEEK_CUR);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in log: %d\n", errno);
return -1;
}
return 0;
}
buf = malloc(size);
if (!buf) {
fprintf(stderr, "Error allocating buffer %llu entry %llu\n", (unsigned long long)size, (unsigned long long)log->cur_entry - 1);
return -1;
}
ret = read(log->logfd, buf, size);
if (ret != size) {
fprintf(stderr, "Error reading data: %d\n", errno);
free(buf);
return -1;
}
log->cur_pos += size;
offset = le64_to_cpu(entry->sector) * log->sectorsize;
ret = pwrite(log->replayfd, buf, size, offset);
free(buf);
if (ret != size) {
fprintf(stderr, "Error writing data: %d\n", errno);
return -1;
}
return 0;
}
/*
* @log: the log we are manipulating.
* @entry_num: the entry we want.
*
* Seek to the given entry in the log, starting at 0 and ending at
* log->nr_entries - 1.
*/
int log_seek_entry(struct log *log, u64 entry_num)
{
u64 i = 0;
if (entry_num >= log->nr_entries) {
fprintf(stderr, "Invalid entry number\n");
return -1;
}
/* Skip the first sector containing the log super block */
log->cur_pos = lseek(log->logfd, log->sectorsize, SEEK_SET);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in file: %d\n", errno);
return -1;
}
log->cur_entry = 0;
for (i = 0; i < entry_num; i++) {
struct log_write_entry entry;
ssize_t ret;
off_t seek_size;
u64 flags;
ret = read(log->logfd, &entry, sizeof(entry));
if (ret != sizeof(entry)) {
fprintf(stderr, "Error reading entry: %d\n", errno);
return -1;
}
if (!log_entry_valid(&entry)) {
fprintf(stderr, "Malformed entry @%llu\n",
log->cur_pos / log->sectorsize);
return -1;
}
if (log_writes_verbose > 1)
printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx\n",
(int)i, log->cur_pos / log->sectorsize,
(unsigned long long)le64_to_cpu(entry.sector),
(unsigned long long)le64_to_cpu(entry.nr_sectors),
(unsigned long long)le64_to_cpu(entry.flags));
flags = le64_to_cpu(entry.flags);
seek_size = log->sectorsize - sizeof(entry);
if (!(flags & LOG_DISCARD_FLAG))
seek_size += le64_to_cpu(entry.nr_sectors) *
log->sectorsize;
log->cur_pos = lseek(log->logfd, seek_size, SEEK_CUR);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in file: %d\n", errno);
return -1;
}
log->cur_entry++;
}
return 0;
}
/*
* @log: the log we are manipulating.
* @entry: the entry we read.
* @read_data: read the extra data for the entry, your entry must be
* log->sectorsize large.
*
* @return: 1 if we hit the end of the log, 0 we got the next entry, < 0 if
* there was an error.
*
* Seek to the next entry in the log.
*/
int log_seek_next_entry(struct log *log, struct log_write_entry *entry,
int read_data)
{
size_t read_size = read_data ? log->sectorsize :
sizeof(struct log_write_entry);
u64 flags;
char flags_buf[LOG_FLAGS_BUF_SIZE];
ssize_t ret;
if (log->cur_entry >= log->nr_entries)
return 1;
ret = read(log->logfd, entry, read_size);
if (ret != read_size) {
fprintf(stderr, "Error reading entry: %d\n", errno);
return -1;
}
if (!log_entry_valid(entry)) {
fprintf(stderr, "Malformed entry @%llu\n",
log->cur_pos / log->sectorsize);
return -1;
}
log->cur_entry++;
if (read_size < log->sectorsize) {
log->cur_pos = lseek(log->logfd,
log->sectorsize - sizeof(struct log_write_entry), SEEK_CUR);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in log: %d\n", errno);
return -1;
}
} else {
log->cur_pos += read_size;
}
flags = le64_to_cpu(entry->flags);
entry_flags_to_str(flags, flags_buf);
if (log_writes_verbose > 1)
printf("seek entry %d@%llu: %llu, size %llu, flags 0x%llx(%s)\n",
(int)log->cur_entry - 1, log->cur_pos / log->sectorsize,
(unsigned long long)le64_to_cpu(entry->sector),
(unsigned long long)le64_to_cpu(entry->nr_sectors),
(unsigned long long)flags, flags_buf);
read_size = le64_to_cpu(entry->nr_sectors) * log->sectorsize;
if (!read_size || (flags & LOG_DISCARD_FLAG))
return 0;
log->cur_pos = lseek(log->logfd, read_size, SEEK_CUR);
if (log->cur_pos == (off_t)-1) {
fprintf(stderr, "Error seeking in log: %d\n", errno);
return -1;
}
return 0;
}
/*
* @logfile: the file that contains the write log.
* @replayfile: the file/device to replay onto, can be NULL.
*
* Opens a logfile and makes sure it is valid and returns a struct log.
*/
struct log *log_open(char *logfile, char *replayfile)
{
struct log *log;
struct log_write_super super;
ssize_t ret;
log = malloc(sizeof(struct log));
if (!log) {
fprintf(stderr, "Couldn't alloc log\n");
return NULL;
}
log->replayfd = -1;
log->logfd = open(logfile, O_RDONLY);
if (log->logfd < 0) {
fprintf(stderr, "Couldn't open log %s: %d\n", logfile,
errno);
log_free(log);
return NULL;
}
if (replayfile) {
log->replayfd = open(replayfile, O_WRONLY);
if (log->replayfd < 0) {
fprintf(stderr, "Couldn't open replay file %s: %d\n",
replayfile, errno);
log_free(log);
return NULL;
}
}
ret = read(log->logfd, &super, sizeof(struct log_write_super));
if (ret < sizeof(struct log_write_super)) {
fprintf(stderr, "Error reading super: %d\n", errno);
log_free(log);
return NULL;
}
if (le64_to_cpu(super.magic) != WRITE_LOG_MAGIC) {
fprintf(stderr, "Magic doesn't match\n");
log_free(log);
return NULL;
}
if (le64_to_cpu(super.version) != WRITE_LOG_VERSION) {
fprintf(stderr, "Version mismatch, wanted %d, have %d\n",
WRITE_LOG_VERSION, (int)le64_to_cpu(super.version));
log_free(log);
return NULL;
}
log->sectorsize = le32_to_cpu(super.sectorsize);
log->nr_entries = le64_to_cpu(super.nr_entries);
log->max_zero_size = 128 * 1024 * 1024;
log->cur_pos = lseek(log->logfd, log->sectorsize - sizeof(super), SEEK_CUR);
if (log->cur_pos == (off_t) -1) {
fprintf(stderr, "Error seeking to first entry: %d\n", errno);
log_free(log);
return NULL;
}
log->cur_entry = 0;
return log;
}