| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * nvme structure declarations and helper functions for the |
| * io_uring_cmd engine. |
| */ |
| |
| #include "nvme.h" |
| #include "../crc/crc-t10dif.h" |
| #include "../crc/crc64.h" |
| |
| static inline __u64 get_slba(struct nvme_data *data, __u64 offset) |
| { |
| if (data->lba_ext) |
| return offset / data->lba_ext; |
| |
| return offset >> data->lba_shift; |
| } |
| |
| static inline __u32 get_nlb(struct nvme_data *data, __u64 len) |
| { |
| if (data->lba_ext) |
| return len / data->lba_ext - 1; |
| |
| return (len >> data->lba_shift) - 1; |
| } |
| |
| static void fio_nvme_generate_pi_16b_guard(struct nvme_data *data, |
| struct io_u *io_u, |
| struct nvme_cmd_ext_io_opts *opts) |
| { |
| struct nvme_pi_data *pi_data = io_u->engine_data; |
| struct nvme_16b_guard_pif *pi; |
| unsigned char *buf = io_u->xfer_buf; |
| unsigned char *md_buf = io_u->mmap_data; |
| __u64 slba = get_slba(data, io_u->offset); |
| __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1; |
| __u32 lba_num = 0; |
| __u16 guard = 0; |
| |
| if (data->pi_loc) { |
| if (data->lba_ext) |
| pi_data->interval = data->lba_ext - data->ms; |
| else |
| pi_data->interval = 0; |
| } else { |
| if (data->lba_ext) |
| pi_data->interval = data->lba_ext - sizeof(struct nvme_16b_guard_pif); |
| else |
| pi_data->interval = data->ms - sizeof(struct nvme_16b_guard_pif); |
| } |
| |
| if (io_u->ddir != DDIR_WRITE) |
| return; |
| |
| while (lba_num < nlb) { |
| if (data->lba_ext) |
| pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval); |
| else |
| pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval); |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) { |
| if (data->lba_ext) { |
| guard = fio_crc_t10dif(0, buf, pi_data->interval); |
| } else { |
| guard = fio_crc_t10dif(0, buf, data->lba_size); |
| guard = fio_crc_t10dif(guard, md_buf, pi_data->interval); |
| } |
| pi->guard = cpu_to_be16(guard); |
| } |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP) |
| pi->apptag = cpu_to_be16(pi_data->apptag); |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) { |
| switch (data->pi_type) { |
| case NVME_NS_DPS_PI_TYPE1: |
| case NVME_NS_DPS_PI_TYPE2: |
| pi->srtag = cpu_to_be32((__u32)slba + lba_num); |
| break; |
| case NVME_NS_DPS_PI_TYPE3: |
| break; |
| } |
| } |
| if (data->lba_ext) { |
| buf += data->lba_ext; |
| } else { |
| buf += data->lba_size; |
| md_buf += data->ms; |
| } |
| lba_num++; |
| } |
| } |
| |
| static int fio_nvme_verify_pi_16b_guard(struct nvme_data *data, |
| struct io_u *io_u) |
| { |
| struct nvme_pi_data *pi_data = io_u->engine_data; |
| struct nvme_16b_guard_pif *pi; |
| struct fio_file *f = io_u->file; |
| unsigned char *buf = io_u->xfer_buf; |
| unsigned char *md_buf = io_u->mmap_data; |
| __u64 slba = get_slba(data, io_u->offset); |
| __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1; |
| __u32 lba_num = 0; |
| __u16 unmask_app, unmask_app_exp, guard = 0; |
| |
| while (lba_num < nlb) { |
| if (data->lba_ext) |
| pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval); |
| else |
| pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval); |
| |
| if (data->pi_type == NVME_NS_DPS_PI_TYPE3) { |
| if (pi->apptag == NVME_PI_APP_DISABLE && |
| pi->srtag == NVME_PI_REF_DISABLE) |
| goto next; |
| } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 || |
| data->pi_type == NVME_NS_DPS_PI_TYPE2) { |
| if (pi->apptag == NVME_PI_APP_DISABLE) |
| goto next; |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) { |
| if (data->lba_ext) { |
| guard = fio_crc_t10dif(0, buf, pi_data->interval); |
| } else { |
| guard = fio_crc_t10dif(0, buf, data->lba_size); |
| guard = fio_crc_t10dif(guard, md_buf, pi_data->interval); |
| } |
| if (be16_to_cpu(pi->guard) != guard) { |
| log_err("%s: Guard compare error: LBA: %llu Expected=%x, Actual=%x\n", |
| f->file_name, (unsigned long long)slba, |
| guard, be16_to_cpu(pi->guard)); |
| return -EIO; |
| } |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) { |
| unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask; |
| unmask_app_exp = pi_data->apptag & pi_data->apptag_mask; |
| if (unmask_app != unmask_app_exp) { |
| log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n", |
| f->file_name, (unsigned long long)slba, |
| unmask_app_exp, unmask_app); |
| return -EIO; |
| } |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) { |
| switch (data->pi_type) { |
| case NVME_NS_DPS_PI_TYPE1: |
| case NVME_NS_DPS_PI_TYPE2: |
| if (be32_to_cpu(pi->srtag) != |
| ((__u32)slba + lba_num)) { |
| log_err("%s: REFTAG compare error: LBA: %llu Expected=%x, Actual=%x\n", |
| f->file_name, (unsigned long long)slba, |
| (__u32)slba + lba_num, |
| be32_to_cpu(pi->srtag)); |
| return -EIO; |
| } |
| break; |
| case NVME_NS_DPS_PI_TYPE3: |
| break; |
| } |
| } |
| next: |
| if (data->lba_ext) { |
| buf += data->lba_ext; |
| } else { |
| buf += data->lba_size; |
| md_buf += data->ms; |
| } |
| lba_num++; |
| } |
| |
| return 0; |
| } |
| |
| static void fio_nvme_generate_pi_64b_guard(struct nvme_data *data, |
| struct io_u *io_u, |
| struct nvme_cmd_ext_io_opts *opts) |
| { |
| struct nvme_pi_data *pi_data = io_u->engine_data; |
| struct nvme_64b_guard_pif *pi; |
| unsigned char *buf = io_u->xfer_buf; |
| unsigned char *md_buf = io_u->mmap_data; |
| uint64_t guard = 0; |
| __u64 slba = get_slba(data, io_u->offset); |
| __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1; |
| __u32 lba_num = 0; |
| |
| if (data->pi_loc) { |
| if (data->lba_ext) |
| pi_data->interval = data->lba_ext - data->ms; |
| else |
| pi_data->interval = 0; |
| } else { |
| if (data->lba_ext) |
| pi_data->interval = data->lba_ext - sizeof(struct nvme_64b_guard_pif); |
| else |
| pi_data->interval = data->ms - sizeof(struct nvme_64b_guard_pif); |
| } |
| |
| if (io_u->ddir != DDIR_WRITE) |
| return; |
| |
| while (lba_num < nlb) { |
| if (data->lba_ext) |
| pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval); |
| else |
| pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval); |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) { |
| if (data->lba_ext) { |
| guard = fio_crc64_nvme(0, buf, pi_data->interval); |
| } else { |
| guard = fio_crc64_nvme(0, buf, data->lba_size); |
| guard = fio_crc64_nvme(guard, md_buf, pi_data->interval); |
| } |
| pi->guard = cpu_to_be64(guard); |
| } |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP) |
| pi->apptag = cpu_to_be16(pi_data->apptag); |
| |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) { |
| switch (data->pi_type) { |
| case NVME_NS_DPS_PI_TYPE1: |
| case NVME_NS_DPS_PI_TYPE2: |
| put_unaligned_be48(slba + lba_num, pi->srtag); |
| break; |
| case NVME_NS_DPS_PI_TYPE3: |
| break; |
| } |
| } |
| if (data->lba_ext) { |
| buf += data->lba_ext; |
| } else { |
| buf += data->lba_size; |
| md_buf += data->ms; |
| } |
| lba_num++; |
| } |
| } |
| |
| static int fio_nvme_verify_pi_64b_guard(struct nvme_data *data, |
| struct io_u *io_u) |
| { |
| struct nvme_pi_data *pi_data = io_u->engine_data; |
| struct nvme_64b_guard_pif *pi; |
| struct fio_file *f = io_u->file; |
| unsigned char *buf = io_u->xfer_buf; |
| unsigned char *md_buf = io_u->mmap_data; |
| __u64 slba = get_slba(data, io_u->offset); |
| __u64 ref, ref_exp, guard = 0; |
| __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1; |
| __u32 lba_num = 0; |
| __u16 unmask_app, unmask_app_exp; |
| |
| while (lba_num < nlb) { |
| if (data->lba_ext) |
| pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval); |
| else |
| pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval); |
| |
| if (data->pi_type == NVME_NS_DPS_PI_TYPE3) { |
| if (pi->apptag == NVME_PI_APP_DISABLE && |
| fio_nvme_pi_ref_escape(pi->srtag)) |
| goto next; |
| } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 || |
| data->pi_type == NVME_NS_DPS_PI_TYPE2) { |
| if (pi->apptag == NVME_PI_APP_DISABLE) |
| goto next; |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) { |
| if (data->lba_ext) { |
| guard = fio_crc64_nvme(0, buf, pi_data->interval); |
| } else { |
| guard = fio_crc64_nvme(0, buf, data->lba_size); |
| guard = fio_crc64_nvme(guard, md_buf, pi_data->interval); |
| } |
| if (be64_to_cpu((uint64_t)pi->guard) != guard) { |
| log_err("%s: Guard compare error: LBA: %llu Expected=%llx, Actual=%llx\n", |
| f->file_name, (unsigned long long)slba, |
| guard, be64_to_cpu((uint64_t)pi->guard)); |
| return -EIO; |
| } |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) { |
| unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask; |
| unmask_app_exp = pi_data->apptag & pi_data->apptag_mask; |
| if (unmask_app != unmask_app_exp) { |
| log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n", |
| f->file_name, (unsigned long long)slba, |
| unmask_app_exp, unmask_app); |
| return -EIO; |
| } |
| } |
| |
| if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) { |
| switch (data->pi_type) { |
| case NVME_NS_DPS_PI_TYPE1: |
| case NVME_NS_DPS_PI_TYPE2: |
| ref = get_unaligned_be48(pi->srtag); |
| ref_exp = (slba + lba_num) & ((1ULL << 48) - 1); |
| if (ref != ref_exp) { |
| log_err("%s: REFTAG compare error: LBA: %llu Expected=%llx, Actual=%llx\n", |
| f->file_name, (unsigned long long)slba, |
| ref_exp, ref); |
| return -EIO; |
| } |
| break; |
| case NVME_NS_DPS_PI_TYPE3: |
| break; |
| } |
| } |
| next: |
| if (data->lba_ext) { |
| buf += data->lba_ext; |
| } else { |
| buf += data->lba_size; |
| md_buf += data->ms; |
| } |
| lba_num++; |
| } |
| |
| return 0; |
| } |
| void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, |
| struct nvme_dsm *dsm) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(io_u->file); |
| struct trim_range *range; |
| uint8_t *buf_point; |
| int i; |
| |
| cmd->opcode = nvme_cmd_dsm; |
| cmd->nsid = data->nsid; |
| cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE; |
| cmd->addr = (__u64) (uintptr_t) (&dsm->range[0]); |
| |
| if (dsm->nr_ranges == 1) { |
| dsm->range[0].slba = get_slba(data, io_u->offset); |
| /* nlb is a 1-based value for deallocate */ |
| dsm->range[0].nlb = get_nlb(data, io_u->xfer_buflen) + 1; |
| cmd->cdw10 = 0; |
| cmd->data_len = sizeof(struct nvme_dsm_range); |
| } else { |
| buf_point = io_u->xfer_buf; |
| for (i = 0; i < io_u->number_trim; i++) { |
| range = (struct trim_range *)buf_point; |
| dsm->range[i].slba = get_slba(data, range->start); |
| /* nlb is a 1-based value for deallocate */ |
| dsm->range[i].nlb = get_nlb(data, range->len) + 1; |
| buf_point += sizeof(struct trim_range); |
| } |
| cmd->cdw10 = io_u->number_trim - 1; |
| cmd->data_len = io_u->number_trim * sizeof(struct nvme_dsm_range); |
| } |
| } |
| |
| int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, |
| struct iovec *iov, struct nvme_dsm *dsm) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(io_u->file); |
| __u64 slba; |
| __u32 nlb; |
| |
| memset(cmd, 0, sizeof(struct nvme_uring_cmd)); |
| |
| switch (io_u->ddir) { |
| case DDIR_READ: |
| cmd->opcode = nvme_cmd_read; |
| break; |
| case DDIR_WRITE: |
| cmd->opcode = nvme_cmd_write; |
| break; |
| case DDIR_TRIM: |
| fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm); |
| return 0; |
| default: |
| return -ENOTSUP; |
| } |
| |
| slba = get_slba(data, io_u->offset); |
| nlb = get_nlb(data, io_u->xfer_buflen); |
| |
| /* cdw10 and cdw11 represent starting lba */ |
| cmd->cdw10 = slba & 0xffffffff; |
| cmd->cdw11 = slba >> 32; |
| /* cdw12 represent number of lba's for read/write */ |
| cmd->cdw12 = nlb | (io_u->dtype << 20); |
| cmd->cdw13 = io_u->dspec << 16; |
| if (iov) { |
| iov->iov_base = io_u->xfer_buf; |
| iov->iov_len = io_u->xfer_buflen; |
| cmd->addr = (__u64)(uintptr_t)iov; |
| cmd->data_len = 1; |
| } else { |
| cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf; |
| cmd->data_len = io_u->xfer_buflen; |
| } |
| if (data->lba_shift && data->ms) { |
| cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data; |
| cmd->metadata_len = (nlb + 1) * data->ms; |
| } |
| cmd->nsid = data->nsid; |
| return 0; |
| } |
| |
| void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u, |
| struct nvme_cmd_ext_io_opts *opts) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(io_u->file); |
| __u64 slba; |
| |
| slba = get_slba(data, io_u->offset); |
| cmd->cdw12 |= opts->io_flags; |
| |
| if (data->pi_type && !(opts->io_flags & NVME_IO_PRINFO_PRACT)) { |
| if (data->guard_type == NVME_NVM_NS_16B_GUARD) |
| fio_nvme_generate_pi_16b_guard(data, io_u, opts); |
| else if (data->guard_type == NVME_NVM_NS_64B_GUARD) |
| fio_nvme_generate_pi_64b_guard(data, io_u, opts); |
| } |
| |
| switch (data->pi_type) { |
| case NVME_NS_DPS_PI_TYPE1: |
| case NVME_NS_DPS_PI_TYPE2: |
| switch (data->guard_type) { |
| case NVME_NVM_NS_16B_GUARD: |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) |
| cmd->cdw14 = (__u32)slba; |
| break; |
| case NVME_NVM_NS_64B_GUARD: |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) { |
| cmd->cdw14 = (__u32)slba; |
| cmd->cdw3 = ((slba >> 32) & 0xffff); |
| } |
| break; |
| default: |
| break; |
| } |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP) |
| cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag); |
| break; |
| case NVME_NS_DPS_PI_TYPE3: |
| if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP) |
| cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag); |
| break; |
| case NVME_NS_DPS_PI_NONE: |
| break; |
| } |
| } |
| |
| int fio_nvme_pi_verify(struct nvme_data *data, struct io_u *io_u) |
| { |
| int ret = 0; |
| |
| switch (data->guard_type) { |
| case NVME_NVM_NS_16B_GUARD: |
| ret = fio_nvme_verify_pi_16b_guard(data, io_u); |
| break; |
| case NVME_NVM_NS_64B_GUARD: |
| ret = fio_nvme_verify_pi_64b_guard(data, io_u); |
| break; |
| default: |
| break; |
| } |
| |
| return ret; |
| } |
| |
| static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns, |
| enum nvme_csi csi, void *data) |
| { |
| struct nvme_passthru_cmd cmd = { |
| .opcode = nvme_admin_identify, |
| .nsid = nsid, |
| .addr = (__u64)(uintptr_t)data, |
| .data_len = NVME_IDENTIFY_DATA_SIZE, |
| .cdw10 = cns, |
| .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT, |
| .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, |
| }; |
| |
| return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd); |
| } |
| |
| int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act, |
| struct nvme_data *data) |
| { |
| struct nvme_id_ns ns; |
| struct nvme_id_ctrl ctrl; |
| struct nvme_nvm_id_ns nvm_ns; |
| int namespace_id; |
| int fd, err; |
| __u32 format_idx, elbaf; |
| |
| if (f->filetype != FIO_TYPE_CHAR) { |
| log_err("ioengine io_uring_cmd only works with nvme ns " |
| "generic char devices (/dev/ngXnY)\n"); |
| return 1; |
| } |
| |
| fd = open(f->file_name, O_RDONLY); |
| if (fd < 0) |
| return -errno; |
| |
| namespace_id = ioctl(fd, NVME_IOCTL_ID); |
| if (namespace_id < 0) { |
| err = -errno; |
| log_err("%s: failed to fetch namespace-id\n", f->file_name); |
| goto out; |
| } |
| |
| err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl); |
| if (err) { |
| log_err("%s: failed to fetch identify ctrl\n", f->file_name); |
| goto out; |
| } |
| |
| /* |
| * Identify namespace to get namespace-id, namespace size in LBA's |
| * and LBA data size. |
| */ |
| err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS, |
| NVME_CSI_NVM, &ns); |
| if (err) { |
| log_err("%s: failed to fetch identify namespace\n", |
| f->file_name); |
| goto out; |
| } |
| |
| data->nsid = namespace_id; |
| |
| /* |
| * 16 or 64 as maximum number of supported LBA formats. |
| * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb |
| * of the format index used to format the namespace. |
| */ |
| if (ns.nlbaf < 16) |
| format_idx = ns.flbas & 0xf; |
| else |
| format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4); |
| |
| data->lba_size = 1 << ns.lbaf[format_idx].ds; |
| data->ms = le16_to_cpu(ns.lbaf[format_idx].ms); |
| |
| /* Check for end to end data protection support */ |
| if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK)) |
| data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK); |
| |
| if (!data->pi_type) |
| goto check_elba; |
| |
| if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) { |
| err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS, |
| NVME_CSI_NVM, &nvm_ns); |
| if (err) { |
| log_err("%s: failed to fetch identify nvm namespace\n", |
| f->file_name); |
| goto out; |
| } |
| |
| elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]); |
| |
| /* Currently we don't support storage tags */ |
| if (elbaf & NVME_ID_NS_NVM_STS_MASK) { |
| log_err("%s: Storage tag not supported\n", |
| f->file_name); |
| err = -ENOTSUP; |
| goto out; |
| } |
| |
| data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & |
| NVME_ID_NS_NVM_GUARD_MASK; |
| |
| /* No 32 bit guard, as storage tag is mandatory for it */ |
| switch (data->guard_type) { |
| case NVME_NVM_NS_16B_GUARD: |
| data->pi_size = sizeof(struct nvme_16b_guard_pif); |
| break; |
| case NVME_NVM_NS_64B_GUARD: |
| data->pi_size = sizeof(struct nvme_64b_guard_pif); |
| break; |
| default: |
| break; |
| } |
| } else { |
| data->guard_type = NVME_NVM_NS_16B_GUARD; |
| data->pi_size = sizeof(struct nvme_16b_guard_pif); |
| } |
| |
| /* |
| * when PRACT bit is set to 1, and metadata size is equal to protection |
| * information size, controller inserts and removes PI for write and |
| * read commands respectively. |
| */ |
| if (pi_act && data->ms == data->pi_size) |
| data->ms = 0; |
| |
| data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST); |
| |
| check_elba: |
| /* |
| * Bit 4 for flbas indicates if metadata is transferred at the end of |
| * logical block creating an extended LBA. |
| */ |
| if (data->ms && ((ns.flbas >> 4) & 0x1)) |
| data->lba_ext = data->lba_size + data->ms; |
| else |
| data->lba_shift = ilog2(data->lba_size); |
| |
| *nlba = ns.nsze; |
| |
| out: |
| close(fd); |
| return err; |
| } |
| |
| int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f, |
| enum zbd_zoned_model *model) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(f); |
| struct nvme_id_ns ns; |
| struct nvme_passthru_cmd cmd; |
| int fd, ret = 0; |
| |
| if (f->filetype != FIO_TYPE_CHAR) |
| return -EINVAL; |
| |
| /* File is not yet opened */ |
| fd = open(f->file_name, O_RDONLY | O_LARGEFILE); |
| if (fd < 0) |
| return -errno; |
| |
| /* Using nvme_id_ns for data as sizes are same */ |
| ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL, |
| NVME_CSI_ZNS, &ns); |
| if (ret) { |
| *model = ZBD_NONE; |
| goto out; |
| } |
| |
| memset(&cmd, 0, sizeof(struct nvme_passthru_cmd)); |
| |
| /* Using nvme_id_ns for data as sizes are same */ |
| ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS, |
| NVME_CSI_ZNS, &ns); |
| if (ret) { |
| *model = ZBD_NONE; |
| goto out; |
| } |
| |
| *model = ZBD_HOST_MANAGED; |
| out: |
| close(fd); |
| return 0; |
| } |
| |
| static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat, |
| __u32 data_len, void *data) |
| { |
| struct nvme_passthru_cmd cmd = { |
| .opcode = nvme_zns_cmd_mgmt_recv, |
| .nsid = nsid, |
| .addr = (__u64)(uintptr_t)data, |
| .data_len = data_len, |
| .cdw10 = slba & 0xffffffff, |
| .cdw11 = slba >> 32, |
| .cdw12 = (data_len >> 2) - 1, |
| .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat, |
| .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, |
| }; |
| |
| return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd); |
| } |
| |
| int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f, |
| uint64_t offset, struct zbd_zone *zbdz, |
| unsigned int nr_zones) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(f); |
| struct nvme_zone_report *zr; |
| struct nvme_zns_id_ns zns_ns; |
| struct nvme_id_ns ns; |
| unsigned int i = 0, j, zones_fetched = 0; |
| unsigned int max_zones, zones_chunks = 1024; |
| int fd, ret = 0; |
| __u32 zr_len; |
| __u64 zlen; |
| |
| /* File is not yet opened */ |
| fd = open(f->file_name, O_RDONLY | O_LARGEFILE); |
| if (fd < 0) |
| return -errno; |
| |
| zones_fetched = 0; |
| zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc)); |
| zr = calloc(1, zr_len); |
| if (!zr) { |
| close(fd); |
| return -ENOMEM; |
| } |
| |
| ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS, |
| NVME_CSI_NVM, &ns); |
| if (ret) { |
| log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name, |
| ret); |
| goto out; |
| } |
| |
| ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS, |
| NVME_CSI_ZNS, &zns_ns); |
| if (ret) { |
| log_err("%s: nvme_zns_identify_ns failed, err=%d\n", |
| f->file_name, ret); |
| goto out; |
| } |
| zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift; |
| |
| max_zones = (f->real_file_size - offset) / zlen; |
| if (max_zones < nr_zones) |
| nr_zones = max_zones; |
| |
| if (nr_zones < zones_chunks) |
| zones_chunks = nr_zones; |
| |
| while (zones_fetched < nr_zones) { |
| if (zones_fetched + zones_chunks >= nr_zones) { |
| zones_chunks = nr_zones - zones_fetched; |
| zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc)); |
| } |
| ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift, |
| NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr); |
| if (ret) { |
| log_err("%s: nvme_zns_report_zones failed, err=%d\n", |
| f->file_name, ret); |
| goto out; |
| } |
| |
| /* Transform the zone-report */ |
| for (j = 0; j < zr->nr_zones; j++, i++) { |
| struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]); |
| |
| zbdz[i].start = desc->zslba << data->lba_shift; |
| zbdz[i].len = zlen; |
| zbdz[i].wp = desc->wp << data->lba_shift; |
| zbdz[i].capacity = desc->zcap << data->lba_shift; |
| |
| /* Zone Type is stored in first 4 bits. */ |
| switch (desc->zt & 0x0f) { |
| case NVME_ZONE_TYPE_SEQWRITE_REQ: |
| zbdz[i].type = ZBD_ZONE_TYPE_SWR; |
| break; |
| default: |
| log_err("%s: invalid type for zone at offset %llu.\n", |
| f->file_name, (unsigned long long) desc->zslba); |
| ret = -EIO; |
| goto out; |
| } |
| |
| /* Zone State is stored in last 4 bits. */ |
| switch (desc->zs >> 4) { |
| case NVME_ZNS_ZS_EMPTY: |
| zbdz[i].cond = ZBD_ZONE_COND_EMPTY; |
| break; |
| case NVME_ZNS_ZS_IMPL_OPEN: |
| zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN; |
| break; |
| case NVME_ZNS_ZS_EXPL_OPEN: |
| zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN; |
| break; |
| case NVME_ZNS_ZS_CLOSED: |
| zbdz[i].cond = ZBD_ZONE_COND_CLOSED; |
| break; |
| case NVME_ZNS_ZS_FULL: |
| zbdz[i].cond = ZBD_ZONE_COND_FULL; |
| break; |
| case NVME_ZNS_ZS_READ_ONLY: |
| case NVME_ZNS_ZS_OFFLINE: |
| default: |
| /* Treat all these conditions as offline (don't use!) */ |
| zbdz[i].cond = ZBD_ZONE_COND_OFFLINE; |
| zbdz[i].wp = zbdz[i].start; |
| } |
| } |
| zones_fetched += zr->nr_zones; |
| offset += zr->nr_zones * zlen; |
| } |
| |
| ret = zones_fetched; |
| out: |
| free(zr); |
| close(fd); |
| |
| return ret; |
| } |
| |
| int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f, |
| uint64_t offset, uint64_t length) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(f); |
| unsigned int nr_zones; |
| unsigned long long zslba; |
| int i, fd, ret = 0; |
| |
| /* If the file is not yet opened, open it for this function. */ |
| fd = f->fd; |
| if (fd < 0) { |
| fd = open(f->file_name, O_RDWR | O_LARGEFILE); |
| if (fd < 0) |
| return -errno; |
| } |
| |
| zslba = offset >> data->lba_shift; |
| nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size; |
| |
| for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) { |
| struct nvme_passthru_cmd cmd = { |
| .opcode = nvme_zns_cmd_mgmt_send, |
| .nsid = data->nsid, |
| .cdw10 = zslba & 0xffffffff, |
| .cdw11 = zslba >> 32, |
| .cdw13 = NVME_ZNS_ZSA_RESET, |
| .addr = (__u64)(uintptr_t)NULL, |
| .data_len = 0, |
| .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, |
| }; |
| |
| ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd); |
| } |
| |
| if (f->fd < 0) |
| close(fd); |
| return -ret; |
| } |
| |
| int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f, |
| unsigned int *max_open_zones) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(f); |
| struct nvme_zns_id_ns zns_ns; |
| int fd, ret = 0; |
| |
| fd = open(f->file_name, O_RDONLY | O_LARGEFILE); |
| if (fd < 0) |
| return -errno; |
| |
| ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS, |
| NVME_CSI_ZNS, &zns_ns); |
| if (ret) { |
| log_err("%s: nvme_zns_identify_ns failed, err=%d\n", |
| f->file_name, ret); |
| goto out; |
| } |
| |
| *max_open_zones = zns_ns.mor + 1; |
| out: |
| close(fd); |
| return ret; |
| } |
| |
| static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid, |
| __u32 data_len, void *data) |
| { |
| struct nvme_passthru_cmd cmd = { |
| .opcode = nvme_cmd_io_mgmt_recv, |
| .nsid = nsid, |
| .addr = (__u64)(uintptr_t)data, |
| .data_len = data_len, |
| .cdw10 = 1, |
| .cdw11 = (data_len >> 2) - 1, |
| }; |
| |
| return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd); |
| } |
| |
| int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f, |
| struct nvme_fdp_ruh_status *ruhs, __u32 bytes) |
| { |
| struct nvme_data *data = FILE_ENG_DATA(f); |
| int fd, ret; |
| |
| fd = open(f->file_name, O_RDONLY | O_LARGEFILE); |
| if (fd < 0) |
| return -errno; |
| |
| ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs); |
| if (ret) { |
| log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n", |
| f->file_name, ret); |
| errno = ENOTSUP; |
| } else |
| errno = 0; |
| |
| ret = -errno; |
| close(fd); |
| return ret; |
| } |