blob: 664e8b13c727107fd2659f8a9795c1fabcc4d5ae [file] [log] [blame]
/**
* FIO engine for DAOS File System (dfs).
*
* (C) Copyright 2020-2021 Intel Corporation.
*/
#include <fio.h>
#include <optgroup.h>
#include <daos.h>
#include <daos_fs.h>
static bool daos_initialized;
static int num_threads;
static pthread_mutex_t daos_mutex = PTHREAD_MUTEX_INITIALIZER;
daos_handle_t poh; /* pool handle */
daos_handle_t coh; /* container handle */
daos_oclass_id_t cid = OC_UNKNOWN; /* object class */
dfs_t *dfs; /* dfs mount reference */
struct daos_iou {
struct io_u *io_u;
daos_event_t ev;
d_sg_list_t sgl;
d_iov_t iov;
daos_size_t size;
bool complete;
};
struct daos_data {
daos_handle_t eqh;
dfs_obj_t *obj;
struct io_u **io_us;
int queued;
int num_ios;
};
struct daos_fio_options {
void *pad;
char *pool; /* Pool UUID */
char *cont; /* Container UUID */
daos_size_t chsz; /* Chunk size */
char *oclass; /* object class */
#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
char *svcl; /* service replica list, deprecated */
#endif
};
static struct fio_option options[] = {
{
.name = "pool",
.lname = "pool uuid or label",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct daos_fio_options, pool),
.help = "DAOS pool uuid or label",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_DFS,
},
{
.name = "cont",
.lname = "container uuid or label",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct daos_fio_options, cont),
.help = "DAOS container uuid or label",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_DFS,
},
{
.name = "chunk_size",
.lname = "DFS chunk size",
.type = FIO_OPT_ULL,
.off1 = offsetof(struct daos_fio_options, chsz),
.help = "DFS chunk size in bytes",
.def = "0", /* use container default */
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_DFS,
},
{
.name = "object_class",
.lname = "object class",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct daos_fio_options, oclass),
.help = "DAOS object class",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_DFS,
},
#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
{
.name = "svcl",
.lname = "List of service ranks",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct daos_fio_options, svcl),
.help = "List of pool replicated service ranks",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_DFS,
},
#endif
{
.name = NULL,
},
};
static int daos_fio_global_init(struct thread_data *td)
{
struct daos_fio_options *eo = td->eo;
daos_pool_info_t pool_info;
daos_cont_info_t co_info;
int rc = 0;
#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
if (!eo->pool || !eo->cont || !eo->svcl) {
#else
if (!eo->pool || !eo->cont) {
#endif
log_err("Missing required DAOS options\n");
return EINVAL;
}
rc = daos_init();
if (rc != -DER_ALREADY && rc) {
log_err("Failed to initialize daos %d\n", rc);
td_verror(td, rc, "daos_init");
return rc;
}
#if !defined(DAOS_API_VERSION_MAJOR) || \
(DAOS_API_VERSION_MAJOR == 1 && DAOS_API_VERSION_MINOR < 3)
uuid_t pool_uuid, co_uuid;
rc = uuid_parse(eo->pool, pool_uuid);
if (rc) {
log_err("Failed to parse 'Pool uuid': %s\n", eo->pool);
td_verror(td, EINVAL, "uuid_parse(eo->pool)");
return EINVAL;
}
rc = uuid_parse(eo->cont, co_uuid);
if (rc) {
log_err("Failed to parse 'Cont uuid': %s\n", eo->cont);
td_verror(td, EINVAL, "uuid_parse(eo->cont)");
return EINVAL;
}
#endif
/* Connect to the DAOS pool */
#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
d_rank_list_t *svcl = NULL;
svcl = daos_rank_list_parse(eo->svcl, ":");
if (svcl == NULL) {
log_err("Failed to parse svcl\n");
td_verror(td, EINVAL, "daos_rank_list_parse");
return EINVAL;
}
rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
&poh, &pool_info, NULL);
d_rank_list_free(svcl);
#elif (DAOS_API_VERSION_MAJOR == 1 && DAOS_API_VERSION_MINOR < 3)
rc = daos_pool_connect(pool_uuid, NULL, DAOS_PC_RW, &poh, &pool_info,
NULL);
#else
rc = daos_pool_connect(eo->pool, NULL, DAOS_PC_RW, &poh, &pool_info,
NULL);
#endif
if (rc) {
log_err("Failed to connect to pool %d\n", rc);
td_verror(td, rc, "daos_pool_connect");
return rc;
}
/* Open the DAOS container */
#if !defined(DAOS_API_VERSION_MAJOR) || \
(DAOS_API_VERSION_MAJOR == 1 && DAOS_API_VERSION_MINOR < 3)
rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
#else
rc = daos_cont_open(poh, eo->cont, DAOS_COO_RW, &coh, &co_info, NULL);
#endif
if (rc) {
log_err("Failed to open container: %d\n", rc);
td_verror(td, rc, "daos_cont_open");
(void)daos_pool_disconnect(poh, NULL);
return rc;
}
/* Mount encapsulated filesystem */
rc = dfs_mount(poh, coh, O_RDWR, &dfs);
if (rc) {
log_err("Failed to mount DFS namespace: %d\n", rc);
td_verror(td, rc, "dfs_mount");
(void)daos_pool_disconnect(poh, NULL);
(void)daos_cont_close(coh, NULL);
return rc;
}
/* Retrieve object class to use, if specified */
if (eo->oclass)
cid = daos_oclass_name2id(eo->oclass);
return 0;
}
static int daos_fio_global_cleanup()
{
int rc;
int ret = 0;
rc = dfs_umount(dfs);
if (rc) {
log_err("failed to umount dfs: %d\n", rc);
ret = rc;
}
rc = daos_cont_close(coh, NULL);
if (rc) {
log_err("failed to close container: %d\n", rc);
if (ret == 0)
ret = rc;
}
rc = daos_pool_disconnect(poh, NULL);
if (rc) {
log_err("failed to disconnect pool: %d\n", rc);
if (ret == 0)
ret = rc;
}
rc = daos_fini();
if (rc) {
log_err("failed to finalize daos: %d\n", rc);
if (ret == 0)
ret = rc;
}
return ret;
}
static int daos_fio_setup(struct thread_data *td)
{
return 0;
}
static int daos_fio_init(struct thread_data *td)
{
struct daos_data *dd;
int rc = 0;
pthread_mutex_lock(&daos_mutex);
dd = malloc(sizeof(*dd));
if (dd == NULL) {
log_err("Failed to allocate DAOS-private data\n");
rc = ENOMEM;
goto out;
}
dd->queued = 0;
dd->num_ios = td->o.iodepth;
dd->io_us = calloc(dd->num_ios, sizeof(struct io_u *));
if (dd->io_us == NULL) {
log_err("Failed to allocate IO queue\n");
rc = ENOMEM;
goto out;
}
/* initialize DAOS stack if not already up */
if (!daos_initialized) {
rc = daos_fio_global_init(td);
if (rc)
goto out;
daos_initialized = true;
}
rc = daos_eq_create(&dd->eqh);
if (rc) {
log_err("Failed to create event queue: %d\n", rc);
td_verror(td, rc, "daos_eq_create");
goto out;
}
td->io_ops_data = dd;
num_threads++;
out:
if (rc) {
if (dd) {
free(dd->io_us);
free(dd);
}
if (num_threads == 0 && daos_initialized) {
/* don't clobber error return value */
(void)daos_fio_global_cleanup();
daos_initialized = false;
}
}
pthread_mutex_unlock(&daos_mutex);
return rc;
}
static void daos_fio_cleanup(struct thread_data *td)
{
struct daos_data *dd = td->io_ops_data;
int rc;
if (dd == NULL)
return;
rc = daos_eq_destroy(dd->eqh, DAOS_EQ_DESTROY_FORCE);
if (rc < 0) {
log_err("failed to destroy event queue: %d\n", rc);
td_verror(td, rc, "daos_eq_destroy");
}
free(dd->io_us);
free(dd);
pthread_mutex_lock(&daos_mutex);
num_threads--;
if (daos_initialized && num_threads == 0) {
int ret;
ret = daos_fio_global_cleanup();
if (ret < 0 && rc == 0) {
log_err("failed to clean up: %d\n", ret);
td_verror(td, ret, "daos_fio_global_cleanup");
}
daos_initialized = false;
}
pthread_mutex_unlock(&daos_mutex);
}
static int daos_fio_get_file_size(struct thread_data *td, struct fio_file *f)
{
char *file_name = f->file_name;
struct stat stbuf = {0};
int rc;
dprint(FD_FILE, "dfs stat %s\n", f->file_name);
if (!daos_initialized)
return 0;
rc = dfs_stat(dfs, NULL, file_name, &stbuf);
if (rc) {
log_err("Failed to stat %s: %d\n", f->file_name, rc);
td_verror(td, rc, "dfs_stat");
return rc;
}
f->real_file_size = stbuf.st_size;
return 0;
}
static int daos_fio_close(struct thread_data *td, struct fio_file *f)
{
struct daos_data *dd = td->io_ops_data;
int rc;
dprint(FD_FILE, "dfs release %s\n", f->file_name);
rc = dfs_release(dd->obj);
if (rc) {
log_err("Failed to release %s: %d\n", f->file_name, rc);
td_verror(td, rc, "dfs_release");
return rc;
}
return 0;
}
static int daos_fio_open(struct thread_data *td, struct fio_file *f)
{
struct daos_data *dd = td->io_ops_data;
struct daos_fio_options *eo = td->eo;
int flags = 0;
int rc;
dprint(FD_FILE, "dfs open %s (%s/%d/%d)\n",
f->file_name, td_write(td) & !read_only ? "rw" : "r",
td->o.create_on_open, td->o.allow_create);
if (td->o.create_on_open && td->o.allow_create)
flags |= O_CREAT;
if (td_write(td)) {
if (!read_only)
flags |= O_RDWR;
if (td->o.allow_create)
flags |= O_CREAT;
} else if (td_read(td)) {
flags |= O_RDONLY;
}
rc = dfs_open(dfs, NULL, f->file_name,
S_IFREG | S_IRUSR | S_IWUSR,
flags, cid, eo->chsz, NULL, &dd->obj);
if (rc) {
log_err("Failed to open %s: %d\n", f->file_name, rc);
td_verror(td, rc, "dfs_open");
return rc;
}
return 0;
}
static int daos_fio_unlink(struct thread_data *td, struct fio_file *f)
{
int rc;
dprint(FD_FILE, "dfs remove %s\n", f->file_name);
rc = dfs_remove(dfs, NULL, f->file_name, false, NULL);
if (rc) {
log_err("Failed to remove %s: %d\n", f->file_name, rc);
td_verror(td, rc, "dfs_remove");
return rc;
}
return 0;
}
static int daos_fio_invalidate(struct thread_data *td, struct fio_file *f)
{
dprint(FD_FILE, "dfs invalidate %s\n", f->file_name);
return 0;
}
static void daos_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
{
struct daos_iou *io = io_u->engine_data;
if (io) {
io_u->engine_data = NULL;
free(io);
}
}
static int daos_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
{
struct daos_iou *io;
io = malloc(sizeof(struct daos_iou));
if (!io) {
td_verror(td, ENOMEM, "malloc");
return ENOMEM;
}
io->io_u = io_u;
io_u->engine_data = io;
return 0;
}
static struct io_u * daos_fio_event(struct thread_data *td, int event)
{
struct daos_data *dd = td->io_ops_data;
return dd->io_us[event];
}
static int daos_fio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, const struct timespec *t)
{
struct daos_data *dd = td->io_ops_data;
daos_event_t *evp[max];
unsigned int events = 0;
int i;
int rc;
while (events < min) {
rc = daos_eq_poll(dd->eqh, 0, DAOS_EQ_NOWAIT, max, evp);
if (rc < 0) {
log_err("Event poll failed: %d\n", rc);
td_verror(td, rc, "daos_eq_poll");
return events;
}
for (i = 0; i < rc; i++) {
struct daos_iou *io;
struct io_u *io_u;
io = container_of(evp[i], struct daos_iou, ev);
if (io->complete)
log_err("Completion on already completed I/O\n");
io_u = io->io_u;
if (io->ev.ev_error)
io_u->error = io->ev.ev_error;
else
io_u->resid = 0;
dd->io_us[events] = io_u;
dd->queued--;
daos_event_fini(&io->ev);
io->complete = true;
events++;
}
}
dprint(FD_IO, "dfs eq_pool returning %d (%u/%u)\n", events, min, max);
return events;
}
static enum fio_q_status daos_fio_queue(struct thread_data *td,
struct io_u *io_u)
{
struct daos_data *dd = td->io_ops_data;
struct daos_iou *io = io_u->engine_data;
daos_off_t offset = io_u->offset;
int rc;
if (dd->queued == td->o.iodepth)
return FIO_Q_BUSY;
io->sgl.sg_nr = 1;
io->sgl.sg_nr_out = 0;
d_iov_set(&io->iov, io_u->xfer_buf, io_u->xfer_buflen);
io->sgl.sg_iovs = &io->iov;
io->size = io_u->xfer_buflen;
io->complete = false;
rc = daos_event_init(&io->ev, dd->eqh, NULL);
if (rc) {
log_err("Event init failed: %d\n", rc);
io_u->error = rc;
return FIO_Q_COMPLETED;
}
switch (io_u->ddir) {
case DDIR_WRITE:
rc = dfs_write(dfs, dd->obj, &io->sgl, offset, &io->ev);
if (rc) {
log_err("dfs_write failed: %d\n", rc);
io_u->error = rc;
return FIO_Q_COMPLETED;
}
break;
case DDIR_READ:
rc = dfs_read(dfs, dd->obj, &io->sgl, offset, &io->size,
&io->ev);
if (rc) {
log_err("dfs_read failed: %d\n", rc);
io_u->error = rc;
return FIO_Q_COMPLETED;
}
break;
case DDIR_SYNC:
io_u->error = 0;
return FIO_Q_COMPLETED;
default:
dprint(FD_IO, "Invalid IO type: %d\n", io_u->ddir);
io_u->error = -DER_INVAL;
return FIO_Q_COMPLETED;
}
dd->queued++;
return FIO_Q_QUEUED;
}
static int daos_fio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
{
return 0;
}
/* ioengine_ops for get_ioengine() */
FIO_STATIC struct ioengine_ops ioengine = {
.name = "dfs",
.version = FIO_IOOPS_VERSION,
.flags = FIO_DISKLESSIO | FIO_NODISKUTIL,
.setup = daos_fio_setup,
.init = daos_fio_init,
.prep = daos_fio_prep,
.cleanup = daos_fio_cleanup,
.open_file = daos_fio_open,
.invalidate = daos_fio_invalidate,
.get_file_size = daos_fio_get_file_size,
.close_file = daos_fio_close,
.unlink_file = daos_fio_unlink,
.queue = daos_fio_queue,
.getevents = daos_fio_getevents,
.event = daos_fio_event,
.io_u_init = daos_fio_io_u_init,
.io_u_free = daos_fio_io_u_free,
.option_struct_size = sizeof(struct daos_fio_options),
.options = options,
};
static void fio_init fio_dfs_register(void)
{
register_ioengine(&ioengine);
}
static void fio_exit fio_dfs_unregister(void)
{
unregister_ioengine(&ioengine);
}