blob: bf437c8d6f2c3fbba3d51d8ce7e3a4b0a834c6a3 [file] [log] [blame]
/*
* sg engine
*
* IO engine that uses the Linux SG v3 interface to talk to SCSI devices
*
* This ioengine can operate in two modes:
* sync with block devices (/dev/sdX) or
* with character devices (/dev/sgY) with direct=1 or sync=1
* async with character devices with direct=0 and sync=0
*
* What value does queue() return for the different cases?
* queue() return value
* In sync mode:
* /dev/sdX RWT FIO_Q_COMPLETED
* /dev/sgY RWT FIO_Q_COMPLETED
* with direct=1 or sync=1
*
* In async mode:
* /dev/sgY RWT FIO_Q_QUEUED
* direct=0 and sync=0
*
* Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in
* issue_time *before* each IO is sent to queue()
*
* Where are the IO counting functions called for the different cases?
*
* In sync mode:
* /dev/sdX (commit==NULL)
* RWT
* io_u_mark_depth() called in td_io_queue()
* io_u_mark_submit/complete() called in td_io_queue()
* issue_time set in td_io_queue()
*
* /dev/sgY with direct=1 or sync=1 (commit does nothing)
* RWT
* io_u_mark_depth() called in td_io_queue()
* io_u_mark_submit/complete() called in queue()
* issue_time set in td_io_queue()
*
* In async mode:
* /dev/sgY with direct=0 and sync=0
* RW: read and write operations are submitted in queue()
* io_u_mark_depth() called in td_io_commit()
* io_u_mark_submit() called in queue()
* issue_time set in td_io_queue()
* T: trim operations are queued in queue() and submitted in commit()
* io_u_mark_depth() called in td_io_commit()
* io_u_mark_submit() called in commit()
* issue_time set in commit()
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <poll.h>
#include "../fio.h"
#include "../optgroup.h"
#ifdef FIO_HAVE_SGIO
enum {
FIO_SG_WRITE = 1,
FIO_SG_WRITE_VERIFY = 2,
FIO_SG_WRITE_SAME = 3
};
struct sg_options {
void *pad;
unsigned int readfua;
unsigned int writefua;
unsigned int write_mode;
};
static struct fio_option options[] = {
{
.name = "readfua",
.lname = "sg engine read fua flag support",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct sg_options, readfua),
.help = "Set FUA flag (force unit access) for all Read operations",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_SG,
},
{
.name = "writefua",
.lname = "sg engine write fua flag support",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct sg_options, writefua),
.help = "Set FUA flag (force unit access) for all Write operations",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_SG,
},
{
.name = "sg_write_mode",
.lname = "specify sg write mode",
.type = FIO_OPT_STR,
.off1 = offsetof(struct sg_options, write_mode),
.help = "Specify SCSI WRITE mode",
.def = "write",
.posval = {
{ .ival = "write",
.oval = FIO_SG_WRITE,
.help = "Issue standard SCSI WRITE commands",
},
{ .ival = "verify",
.oval = FIO_SG_WRITE_VERIFY,
.help = "Issue SCSI WRITE AND VERIFY commands",
},
{ .ival = "same",
.oval = FIO_SG_WRITE_SAME,
.help = "Issue SCSI WRITE SAME commands",
},
},
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_SG,
},
{
.name = NULL,
},
};
#define MAX_10B_LBA 0xFFFFFFFFULL
#define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
#define MAX_SB 64 // sense block maximum return size
/*
#define FIO_SGIO_DEBUG
*/
struct sgio_cmd {
unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands
unsigned char sb[MAX_SB]; // add sense block to commands
int nr;
};
struct sgio_trim {
uint8_t *unmap_param;
unsigned int unmap_range_count;
struct io_u **trim_io_us;
};
struct sgio_data {
struct sgio_cmd *cmds;
struct io_u **events;
struct pollfd *pfds;
int *fd_flags;
void *sgbuf;
unsigned int bs;
int type_checked;
struct sgio_trim **trim_queues;
int current_queue;
#ifdef FIO_SGIO_DEBUG
unsigned int *trim_queue_map;
#endif
};
static inline uint32_t sgio_get_be32(uint8_t *buf)
{
return be32_to_cpu(*((uint32_t *) buf));
}
static inline uint64_t sgio_get_be64(uint8_t *buf)
{
return be64_to_cpu(*((uint64_t *) buf));
}
static inline void sgio_set_be16(uint16_t val, uint8_t *buf)
{
uint16_t t = cpu_to_be16(val);
memcpy(buf, &t, sizeof(uint16_t));
}
static inline void sgio_set_be32(uint32_t val, uint8_t *buf)
{
uint32_t t = cpu_to_be32(val);
memcpy(buf, &t, sizeof(uint32_t));
}
static inline void sgio_set_be64(uint64_t val, uint8_t *buf)
{
uint64_t t = cpu_to_be64(val);
memcpy(buf, &t, sizeof(uint64_t));
}
static inline bool sgio_unbuffered(struct thread_data *td)
{
return (td->o.odirect || td->o.sync_io);
}
static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
struct io_u *io_u, int fs)
{
struct sgio_cmd *sc = &sd->cmds[io_u->index];
memset(hdr, 0, sizeof(*hdr));
memset(sc->cdb, 0, sizeof(sc->cdb));
hdr->interface_id = 'S';
hdr->cmdp = sc->cdb;
hdr->cmd_len = sizeof(sc->cdb);
hdr->sbp = sc->sb;
hdr->mx_sb_len = sizeof(sc->sb);
hdr->pack_id = io_u->index;
hdr->usr_ptr = io_u;
hdr->timeout = SCSI_TIMEOUT_MS;
if (fs) {
hdr->dxferp = io_u->xfer_buf;
hdr->dxfer_len = io_u->xfer_buflen;
}
}
static int pollin_events(struct pollfd *pfds, int fds)
{
int i;
for (i = 0; i < fds; i++)
if (pfds[i].revents & POLLIN)
return 1;
return 0;
}
static int sg_fd_read(int fd, void *data, size_t size)
{
int err = 0;
while (size) {
ssize_t ret;
ret = read(fd, data, size);
if (ret < 0) {
if (errno == EAGAIN || errno == EINTR)
continue;
err = errno;
break;
} else if (!ret)
break;
else {
data += ret;
size -= ret;
}
}
if (err)
return err;
if (size)
return EAGAIN;
return 0;
}
static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
unsigned int max,
const struct timespec fio_unused *t)
{
struct sgio_data *sd = td->io_ops_data;
int left = max, eventNum, ret, r = 0, trims = 0;
void *buf = sd->sgbuf;
unsigned int i, j, events;
struct fio_file *f;
struct io_u *io_u;
/*
* Fill in the file descriptors
*/
for_each_file(td, f, i) {
/*
* don't block for min events == 0
*/
if (!min)
sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
else
sd->fd_flags[i] = -1;
sd->pfds[i].fd = f->fd;
sd->pfds[i].events = POLLIN;
}
/*
** There are two counters here:
** - number of SCSI commands completed
** - number of io_us completed
**
** These are the same with reads and writes, but
** could differ with trim/unmap commands because
** a single unmap can include multiple io_us
*/
while (left > 0) {
char *p;
dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left);
do {
if (!min)
break;
ret = poll(sd->pfds, td->o.nr_files, -1);
if (ret < 0) {
if (!r)
r = -errno;
td_verror(td, errno, "poll");
break;
} else if (!ret)
continue;
if (pollin_events(sd->pfds, td->o.nr_files))
break;
} while (1);
if (r < 0)
break;
re_read:
p = buf;
events = 0;
for_each_file(td, f, i) {
for (eventNum = 0; eventNum < left; eventNum++) {
ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret);
if (ret) {
r = -ret;
td_verror(td, r, "sg_read");
break;
}
io_u = ((struct sg_io_hdr *)p)->usr_ptr;
if (io_u->ddir == DDIR_TRIM) {
events += sd->trim_queues[io_u->index]->unmap_range_count;
eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1;
} else
events++;
p += sizeof(struct sg_io_hdr);
dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left);
}
}
if (r < 0 && !events)
break;
if (!events) {
usleep(1000);
goto re_read;
}
left -= events;
r += events;
for (i = 0; i < events; i++) {
struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
sd->events[i + trims] = hdr->usr_ptr;
io_u = (struct io_u *)(hdr->usr_ptr);
if (hdr->info & SG_INFO_CHECK) {
/* record if an io error occurred, ignore resid */
memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
sd->events[i + trims]->error = EIO;
}
if (io_u->ddir == DDIR_TRIM) {
struct sgio_trim *st = sd->trim_queues[io_u->index];
#ifdef FIO_SGIO_DEBUG
assert(st->trim_io_us[0] == io_u);
assert(sd->trim_queue_map[io_u->index] == io_u->index);
dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index);
dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims);
#endif
for (j = 1; j < st->unmap_range_count; j++) {
++trims;
sd->events[i + trims] = st->trim_io_us[j];
#ifdef FIO_SGIO_DEBUG
dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims);
assert(sd->trim_queue_map[st->trim_io_us[j]->index] == io_u->index);
#endif
if (hdr->info & SG_INFO_CHECK) {
/* record if an io error occurred, ignore resid */
memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr));
sd->events[i + trims]->error = EIO;
}
}
events -= st->unmap_range_count - 1;
st->unmap_range_count = 0;
}
}
}
if (!min) {
for_each_file(td, f, i) {
if (sd->fd_flags[i] == -1)
continue;
if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
}
}
return r;
}
static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td,
struct fio_file *f,
struct io_u *io_u)
{
struct sgio_data *sd = td->io_ops_data;
struct sg_io_hdr *hdr = &io_u->hdr;
int ret;
sd->events[0] = io_u;
ret = ioctl(f->fd, SG_IO, hdr);
if (ret < 0)
return ret;
/* record if an io error occurred */
if (hdr->info & SG_INFO_CHECK)
io_u->error = EIO;
return FIO_Q_COMPLETED;
}
static enum fio_q_status fio_sgio_rw_doio(struct fio_file *f,
struct io_u *io_u, int do_sync)
{
struct sg_io_hdr *hdr = &io_u->hdr;
int ret;
ret = write(f->fd, hdr, sizeof(*hdr));
if (ret < 0)
return ret;
if (do_sync) {
ret = read(f->fd, hdr, sizeof(*hdr));
if (ret < 0)
return ret;
/* record if an io error occurred */
if (hdr->info & SG_INFO_CHECK)
io_u->error = EIO;
return FIO_Q_COMPLETED;
}
return FIO_Q_QUEUED;
}
static enum fio_q_status fio_sgio_doio(struct thread_data *td,
struct io_u *io_u, int do_sync)
{
struct fio_file *f = io_u->file;
enum fio_q_status ret;
if (f->filetype == FIO_TYPE_BLOCK) {
ret = fio_sgio_ioctl_doio(td, f, io_u);
td_verror(td, io_u->error, __func__);
} else {
ret = fio_sgio_rw_doio(f, io_u, do_sync);
if (do_sync)
td_verror(td, io_u->error, __func__);
}
return ret;
}
static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
unsigned long long nr_blocks)
{
if (lba < MAX_10B_LBA) {
sgio_set_be32((uint32_t) lba, &hdr->cmdp[2]);
sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[7]);
} else {
sgio_set_be64(lba, &hdr->cmdp[2]);
sgio_set_be32((uint32_t) nr_blocks, &hdr->cmdp[10]);
}
return;
}
static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
{
struct sg_io_hdr *hdr = &io_u->hdr;
struct sg_options *o = td->eo;
struct sgio_data *sd = td->io_ops_data;
unsigned long long nr_blocks, lba;
int offset;
if (io_u->xfer_buflen & (sd->bs - 1)) {
log_err("read/write not sector aligned\n");
return EINVAL;
}
nr_blocks = io_u->xfer_buflen / sd->bs;
lba = io_u->offset / sd->bs;
if (io_u->ddir == DDIR_READ) {
sgio_hdr_init(sd, hdr, io_u, 1);
hdr->dxfer_direction = SG_DXFER_FROM_DEV;
if (lba < MAX_10B_LBA)
hdr->cmdp[0] = 0x28; // read(10)
else
hdr->cmdp[0] = 0x88; // read(16)
if (o->readfua)
hdr->cmdp[1] |= 0x08;
fio_sgio_rw_lba(hdr, lba, nr_blocks);
} else if (io_u->ddir == DDIR_WRITE) {
sgio_hdr_init(sd, hdr, io_u, 1);
hdr->dxfer_direction = SG_DXFER_TO_DEV;
switch(o->write_mode) {
case FIO_SG_WRITE:
if (lba < MAX_10B_LBA)
hdr->cmdp[0] = 0x2a; // write(10)
else
hdr->cmdp[0] = 0x8a; // write(16)
if (o->writefua)
hdr->cmdp[1] |= 0x08;
break;
case FIO_SG_WRITE_VERIFY:
if (lba < MAX_10B_LBA)
hdr->cmdp[0] = 0x2e; // write and verify(10)
else
hdr->cmdp[0] = 0x8e; // write and verify(16)
break;
// BYTCHK is disabled by virtue of the memset in sgio_hdr_init
case FIO_SG_WRITE_SAME:
hdr->dxfer_len = sd->bs;
if (lba < MAX_10B_LBA)
hdr->cmdp[0] = 0x41; // write same(10)
else
hdr->cmdp[0] = 0x93; // write same(16)
break;
};
fio_sgio_rw_lba(hdr, lba, nr_blocks);
} else if (io_u->ddir == DDIR_TRIM) {
struct sgio_trim *st;
if (sd->current_queue == -1) {
sgio_hdr_init(sd, hdr, io_u, 0);
hdr->cmd_len = 10;
hdr->dxfer_direction = SG_DXFER_TO_DEV;
hdr->cmdp[0] = 0x42; // unmap
sd->current_queue = io_u->index;
st = sd->trim_queues[sd->current_queue];
hdr->dxferp = st->unmap_param;
#ifdef FIO_SGIO_DEBUG
assert(sd->trim_queues[io_u->index]->unmap_range_count == 0);
dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index);
#endif
}
else
st = sd->trim_queues[sd->current_queue];
dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue);
st->trim_io_us[st->unmap_range_count] = io_u;
#ifdef FIO_SGIO_DEBUG
sd->trim_queue_map[io_u->index] = sd->current_queue;
#endif
offset = 8 + 16 * st->unmap_range_count;
sgio_set_be64(lba, &st->unmap_param[offset]);
sgio_set_be32((uint32_t) nr_blocks, &st->unmap_param[offset + 8]);
st->unmap_range_count++;
} else if (ddir_sync(io_u->ddir)) {
sgio_hdr_init(sd, hdr, io_u, 0);
hdr->dxfer_direction = SG_DXFER_NONE;
if (lba < MAX_10B_LBA)
hdr->cmdp[0] = 0x35; // synccache(10)
else
hdr->cmdp[0] = 0x91; // synccache(16)
} else
assert(0);
return 0;
}
static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st)
{
uint16_t cnt = st->unmap_range_count * 16;
hdr->dxfer_len = cnt + 8;
sgio_set_be16(cnt + 8, &hdr->cmdp[7]);
sgio_set_be16(cnt + 6, st->unmap_param);
sgio_set_be16(cnt, &st->unmap_param[2]);
return;
}
static enum fio_q_status fio_sgio_queue(struct thread_data *td,
struct io_u *io_u)
{
struct sg_io_hdr *hdr = &io_u->hdr;
struct sgio_data *sd = td->io_ops_data;
int ret, do_sync = 0;
fio_ro_check(td, io_u);
if (sgio_unbuffered(td) || ddir_sync(io_u->ddir))
do_sync = 1;
if (io_u->ddir == DDIR_TRIM) {
if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) {
struct sgio_trim *st = sd->trim_queues[sd->current_queue];
/* finish cdb setup for unmap because we are
** doing unmap commands synchronously */
#ifdef FIO_SGIO_DEBUG
assert(st->unmap_range_count == 1);
assert(io_u == st->trim_io_us[0]);
#endif
hdr = &io_u->hdr;
fio_sgio_unmap_setup(hdr, st);
st->unmap_range_count = 0;
sd->current_queue = -1;
} else
/* queue up trim ranges and submit in commit() */
return FIO_Q_QUEUED;
}
ret = fio_sgio_doio(td, io_u, do_sync);
if (ret < 0)
io_u->error = errno;
else if (hdr->status) {
io_u->resid = hdr->resid;
io_u->error = EIO;
} else if (td->io_ops->commit != NULL) {
if (do_sync && !ddir_sync(io_u->ddir)) {
io_u_mark_submit(td, 1);
io_u_mark_complete(td, 1);
} else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
io_u_mark_submit(td, 1);
io_u_queued(td, io_u);
}
}
if (io_u->error) {
td_verror(td, io_u->error, "xfer");
return FIO_Q_COMPLETED;
}
return ret;
}
static int fio_sgio_commit(struct thread_data *td)
{
struct sgio_data *sd = td->io_ops_data;
struct sgio_trim *st;
struct io_u *io_u;
struct sg_io_hdr *hdr;
struct timespec now;
unsigned int i;
int ret;
if (sd->current_queue == -1)
return 0;
st = sd->trim_queues[sd->current_queue];
io_u = st->trim_io_us[0];
hdr = &io_u->hdr;
fio_sgio_unmap_setup(hdr, st);
sd->current_queue = -1;
ret = fio_sgio_rw_doio(io_u->file, io_u, 0);
if (ret < 0 || hdr->status) {
int error;
if (ret < 0)
error = errno;
else {
error = EIO;
ret = -EIO;
}
for (i = 0; i < st->unmap_range_count; i++) {
st->trim_io_us[i]->error = error;
clear_io_u(td, st->trim_io_us[i]);
if (hdr->status)
st->trim_io_us[i]->resid = hdr->resid;
}
td_verror(td, error, "xfer");
return ret;
}
if (fio_fill_issue_time(td)) {
fio_gettime(&now, NULL);
for (i = 0; i < st->unmap_range_count; i++) {
memcpy(&st->trim_io_us[i]->issue_time, &now, sizeof(now));
io_u_queued(td, io_u);
}
}
io_u_mark_submit(td, st->unmap_range_count);
return 0;
}
static struct io_u *fio_sgio_event(struct thread_data *td, int event)
{
struct sgio_data *sd = td->io_ops_data;
return sd->events[event];
}
static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
unsigned long long *max_lba)
{
/*
* need to do read capacity operation w/o benefit of sd or
* io_u structures, which are not initialized until later.
*/
struct sg_io_hdr hdr;
unsigned long long hlba;
unsigned int blksz = 0;
unsigned char cmd[16];
unsigned char sb[64];
unsigned char buf[32]; // read capacity return
int ret;
int fd = -1;
struct fio_file *f = td->files[0];
/* open file independent of rest of application */
fd = open(f->file_name, O_RDONLY);
if (fd < 0)
return -errno;
memset(&hdr, 0, sizeof(hdr));
memset(cmd, 0, sizeof(cmd));
memset(sb, 0, sizeof(sb));
memset(buf, 0, sizeof(buf));
/* First let's try a 10 byte read capacity. */
hdr.interface_id = 'S';
hdr.cmdp = cmd;
hdr.cmd_len = 10;
hdr.sbp = sb;
hdr.mx_sb_len = sizeof(sb);
hdr.timeout = SCSI_TIMEOUT_MS;
hdr.cmdp[0] = 0x25; // Read Capacity(10)
hdr.dxfer_direction = SG_DXFER_FROM_DEV;
hdr.dxferp = buf;
hdr.dxfer_len = sizeof(buf);
ret = ioctl(fd, SG_IO, &hdr);
if (ret < 0) {
close(fd);
return ret;
}
if (hdr.info & SG_INFO_CHECK) {
/* RCAP(10) might be unsupported by device. Force RCAP(16) */
hlba = MAX_10B_LBA;
} else {
blksz = sgio_get_be32(&buf[4]);
hlba = sgio_get_be32(buf);
}
/*
* If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
* then need to retry with 16 byte Read Capacity command.
*/
if (hlba == MAX_10B_LBA) {
hdr.cmd_len = 16;
hdr.cmdp[0] = 0x9e; // service action
hdr.cmdp[1] = 0x10; // Read Capacity(16)
sgio_set_be32(sizeof(buf), &hdr.cmdp[10]);
hdr.dxfer_direction = SG_DXFER_FROM_DEV;
hdr.dxferp = buf;
hdr.dxfer_len = sizeof(buf);
ret = ioctl(fd, SG_IO, &hdr);
if (ret < 0) {
close(fd);
return ret;
}
/* record if an io error occurred */
if (hdr.info & SG_INFO_CHECK)
td_verror(td, EIO, "fio_sgio_read_capacity");
blksz = sgio_get_be32(&buf[8]);
hlba = sgio_get_be64(buf);
}
if (blksz) {
*bs = blksz;
*max_lba = hlba;
ret = 0;
} else {
ret = EIO;
}
close(fd);
return ret;
}
static void fio_sgio_cleanup(struct thread_data *td)
{
struct sgio_data *sd = td->io_ops_data;
int i;
if (sd) {
free(sd->events);
free(sd->cmds);
free(sd->fd_flags);
free(sd->pfds);
free(sd->sgbuf);
#ifdef FIO_SGIO_DEBUG
free(sd->trim_queue_map);
#endif
for (i = 0; i < td->o.iodepth; i++) {
free(sd->trim_queues[i]->unmap_param);
free(sd->trim_queues[i]->trim_io_us);
free(sd->trim_queues[i]);
}
free(sd->trim_queues);
free(sd);
}
}
static int fio_sgio_init(struct thread_data *td)
{
struct sgio_data *sd;
struct sgio_trim *st;
int i;
sd = calloc(1, sizeof(*sd));
sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd));
sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr));
sd->events = calloc(td->o.iodepth, sizeof(struct io_u *));
sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd));
sd->fd_flags = calloc(td->o.nr_files, sizeof(int));
sd->type_checked = 0;
sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *));
sd->current_queue = -1;
#ifdef FIO_SGIO_DEBUG
sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int));
#endif
for (i = 0; i < td->o.iodepth; i++) {
sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim));
st = sd->trim_queues[i];
st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16]));
st->unmap_range_count = 0;
st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
}
td->io_ops_data = sd;
/*
* we want to do it, regardless of whether odirect is set or not
*/
td->o.override_sync = 1;
return 0;
}
static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
{
struct sgio_data *sd = td->io_ops_data;
unsigned int bs = 0;
unsigned long long max_lba = 0;
if (f->filetype == FIO_TYPE_BLOCK) {
if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
td_verror(td, errno, "ioctl");
return 1;
}
} else if (f->filetype == FIO_TYPE_CHAR) {
int version, ret;
if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
td_verror(td, errno, "ioctl");
return 1;
}
ret = fio_sgio_read_capacity(td, &bs, &max_lba);
if (ret) {
td_verror(td, td->error, "fio_sgio_read_capacity");
log_err("ioengine sg unable to read capacity successfully\n");
return 1;
}
} else {
td_verror(td, EINVAL, "wrong file type");
log_err("ioengine sg only works on block or character devices\n");
return 1;
}
sd->bs = bs;
// Determine size of commands needed based on max_lba
if (max_lba >= MAX_10B_LBA) {
dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
"commands for lba above 0x%016llx/0x%016llx\n",
MAX_10B_LBA, max_lba);
}
if (f->filetype == FIO_TYPE_BLOCK) {
td->io_ops->getevents = NULL;
td->io_ops->event = NULL;
td->io_ops->commit = NULL;
/*
** Setting these functions to null may cause problems
** with filename=/dev/sda:/dev/sg0 since we are only
** considering a single file
*/
}
sd->type_checked = 1;
return 0;
}
static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
{
struct sgio_data *sd = td->io_ops_data;
int ret;
ret = generic_open_file(td, f);
if (ret)
return ret;
if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
ret = generic_close_file(td, f);
return 1;
}
return 0;
}
/*
* Build an error string with details about the driver, host or scsi
* error contained in the sg header Caller will use as necessary.
*/
static char *fio_sgio_errdetails(struct io_u *io_u)
{
struct sg_io_hdr *hdr = &io_u->hdr;
#define MAXERRDETAIL 1024
#define MAXMSGCHUNK 128
char *msg, msgchunk[MAXMSGCHUNK];
int i;
msg = calloc(1, MAXERRDETAIL);
strcpy(msg, "");
/*
* can't seem to find sg_err.h, so I'll just echo the define values
* so others can search on internet to find clearer clues of meaning.
*/
if (hdr->info & SG_INFO_CHECK) {
if (hdr->host_status) {
snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
strlcat(msg, msgchunk, MAXERRDETAIL);
switch (hdr->host_status) {
case 0x01:
strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
break;
case 0x02:
strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
break;
case 0x03:
strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
break;
case 0x04:
strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
break;
case 0x05:
strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
break;
case 0x06:
strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
break;
case 0x07:
strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
break;
case 0x08:
strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
break;
case 0x09:
strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
break;
case 0x0a:
strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
break;
case 0x0b:
strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
break;
case 0x0c:
strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
break;
case 0x0d:
strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
break;
case 0x0e:
strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
break;
case 0x0f:
strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
break;
case 0x10:
strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
break;
case 0x11:
strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
break;
case 0x12:
strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
break;
case 0x13:
strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
break;
default:
strlcat(msg, "Unknown", MAXERRDETAIL);
break;
}
strlcat(msg, ". ", MAXERRDETAIL);
}
if (hdr->driver_status) {
snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
strlcat(msg, msgchunk, MAXERRDETAIL);
switch (hdr->driver_status & 0x0F) {
case 0x01:
strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
break;
case 0x02:
strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
break;
case 0x03:
strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
break;
case 0x04:
strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
break;
case 0x05:
strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
break;
case 0x06:
strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
break;
case 0x07:
strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
break;
case 0x08:
strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
break;
default:
strlcat(msg, "Unknown", MAXERRDETAIL);
break;
}
strlcat(msg, "; ", MAXERRDETAIL);
switch (hdr->driver_status & 0xF0) {
case 0x10:
strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
break;
case 0x20:
strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
break;
case 0x30:
strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
break;
case 0x40:
strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
break;
case 0x80:
strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
break;
}
strlcat(msg, ". ", MAXERRDETAIL);
}
if (hdr->status) {
snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
strlcat(msg, msgchunk, MAXERRDETAIL);
// SCSI 3 status codes
switch (hdr->status) {
case 0x02:
strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
break;
case 0x04:
strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
break;
case 0x08:
strlcat(msg, "BUSY", MAXERRDETAIL);
break;
case 0x10:
strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
break;
case 0x14:
strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
break;
case 0x18:
strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
break;
case 0x22:
strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
break;
case 0x28:
strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
break;
case 0x30:
strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
break;
case 0x40:
strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
break;
default:
strlcat(msg, "Unknown", MAXERRDETAIL);
break;
}
strlcat(msg, ". ", MAXERRDETAIL);
}
if (hdr->sb_len_wr) {
snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
strlcat(msg, msgchunk, MAXERRDETAIL);
for (i = 0; i < hdr->sb_len_wr; i++) {
snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
strlcat(msg, msgchunk, MAXERRDETAIL);
}
strlcat(msg, ". ", MAXERRDETAIL);
}
if (hdr->resid != 0) {
snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
strlcat(msg, msgchunk, MAXERRDETAIL);
}
if (hdr->cmdp) {
strlcat(msg, "cdb:", MAXERRDETAIL);
for (i = 0; i < hdr->cmd_len; i++) {
snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]);
strlcat(msg, msgchunk, MAXERRDETAIL);
}
strlcat(msg, ". ", MAXERRDETAIL);
if (io_u->ddir == DDIR_TRIM) {
unsigned char *param_list = hdr->dxferp;
strlcat(msg, "dxferp:", MAXERRDETAIL);
for (i = 0; i < hdr->dxfer_len; i++) {
snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]);
strlcat(msg, msgchunk, MAXERRDETAIL);
}
strlcat(msg, ". ", MAXERRDETAIL);
}
}
}
if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
strncpy(msg, "SG Driver did not report a Host, Driver or Device check",
MAXERRDETAIL - 1);
return msg;
}
/*
* get max file size from read capacity.
*/
static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
{
/*
* get_file_size is being called even before sgio_init is
* called, so none of the sg_io structures are
* initialized in the thread_data yet. So we need to do the
* ReadCapacity without any of those helpers. One of the effects
* is that ReadCapacity may get called 4 times on each open:
* readcap(10) followed by readcap(16) if needed - just to get
* the file size after the init occurs - it will be called
* again when "type_check" is called during structure
* initialization I'm not sure how to prevent this little
* inefficiency.
*/
unsigned int bs = 0;
unsigned long long max_lba = 0;
int ret;
if (fio_file_size_known(f))
return 0;
if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
td_verror(td, EINVAL, "wrong file type");
log_err("ioengine sg only works on block or character devices\n");
return 1;
}
ret = fio_sgio_read_capacity(td, &bs, &max_lba);
if (ret ) {
td_verror(td, td->error, "fio_sgio_read_capacity");
log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
return 1;
}
f->real_file_size = (max_lba + 1) * bs;
fio_file_set_size_known(f);
return 0;
}
static struct ioengine_ops ioengine = {
.name = "sg",
.version = FIO_IOOPS_VERSION,
.init = fio_sgio_init,
.prep = fio_sgio_prep,
.queue = fio_sgio_queue,
.commit = fio_sgio_commit,
.getevents = fio_sgio_getevents,
.errdetails = fio_sgio_errdetails,
.event = fio_sgio_event,
.cleanup = fio_sgio_cleanup,
.open_file = fio_sgio_open,
.close_file = generic_close_file,
.get_file_size = fio_sgio_get_file_size,
.flags = FIO_SYNCIO | FIO_RAWIO,
.options = options,
.option_struct_size = sizeof(struct sg_options)
};
#else /* FIO_HAVE_SGIO */
/*
* When we have a proper configure system in place, we simply wont build
* and install this io engine. For now install a crippled version that
* just complains and fails to load.
*/
static int fio_sgio_init(struct thread_data fio_unused *td)
{
log_err("fio: ioengine sg not available\n");
return 1;
}
static struct ioengine_ops ioengine = {
.name = "sg",
.version = FIO_IOOPS_VERSION,
.init = fio_sgio_init,
};
#endif
static void fio_init fio_sgio_register(void)
{
register_ioengine(&ioengine);
}
static void fio_exit fio_sgio_unregister(void)
{
unregister_ioengine(&ioengine);
}