Spdk/examples/nvme/fio_plugin/fio_plugin.c
Ben Walker 8dd1cd2104 check_format: For C files only, fix return type breaks
In SPDK, declarations have the return type on the same line. Definitions
have the return type on a separate line. Astyle has an option for
enforcing this. Unfortunately, it seems to have two bugs:

1) It doesn't work correctly at all on C++ files.
2) It often fails on functions that return enums, or long type names

Deal with 1) by adjusting the check_format.sh script to only tell astyle
to fix return type line breaks for C files and not C++. Deal with 2) by
adding a few typedefs to work around the problem.

Change-Id: Idf28281466cab8411ce252d5f02ab384166790c6
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13437
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Dong Yi <dongx.yi@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
2022-06-27 09:33:48 +00:00

1748 lines
48 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
*/
#include "spdk/stdinc.h"
#include "spdk/nvme.h"
#include "spdk/nvme_zns.h"
#include "spdk/vmd.h"
#include "spdk/env.h"
#include "spdk/string.h"
#include "spdk/log.h"
#include "spdk/endian.h"
#include "spdk/dif.h"
#include "spdk/util.h"
#include "config-host.h"
#include "fio.h"
#include "optgroup.h"
#ifdef for_each_rw_ddir
#define FIO_HAS_ZBD (FIO_IOOPS_VERSION >= 26)
#else
#define FIO_HAS_ZBD (0)
#endif
/* FreeBSD is missing CLOCK_MONOTONIC_RAW,
* so alternative is provided. */
#ifndef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
#endif
#define NVME_IO_ALIGN 4096
static bool g_spdk_env_initialized;
static bool g_log_flag_error;
static int g_spdk_enable_sgl = 0;
static uint32_t g_spdk_sge_size = 4096;
static uint32_t g_spdk_bit_bucket_data_len = 0;
static uint32_t g_spdk_pract_flag;
static uint32_t g_spdk_prchk_flags;
static uint32_t g_spdk_md_per_io_size = 4096;
static uint16_t g_spdk_apptag;
static uint16_t g_spdk_apptag_mask;
struct spdk_fio_options {
void *pad; /* off1 used in option descriptions may not be 0 */
int enable_wrr;
int arbitration_burst;
int low_weight;
int medium_weight;
int high_weight;
int wrr_priority;
int mem_size;
int shm_id;
int enable_sgl;
int sge_size;
int bit_bucket_data_len;
char *hostnqn;
int pi_act;
char *pi_chk;
int md_per_io_size;
int apptag;
int apptag_mask;
char *digest_enable;
int enable_vmd;
int initial_zone_reset;
int zone_append;
int print_qid_mappings;
char *log_flags;
};
struct spdk_fio_request {
struct io_u *io;
/** Offset in current iovec, fio only uses 1 vector */
uint32_t iov_offset;
/** Amount of data used for Bit Bucket SGL */
uint32_t bit_bucket_data_len;
/** Context for NVMe PI */
struct spdk_dif_ctx dif_ctx;
/** Separate metadata buffer pointer */
void *md_buf;
struct spdk_fio_thread *fio_thread;
struct spdk_fio_qpair *fio_qpair;
};
struct spdk_fio_ctrlr {
struct spdk_nvme_transport_id tr_id;
struct spdk_nvme_ctrlr_opts opts;
struct spdk_nvme_ctrlr *ctrlr;
TAILQ_ENTRY(spdk_fio_ctrlr) link;
};
static TAILQ_HEAD(, spdk_fio_ctrlr) g_ctrlrs = TAILQ_HEAD_INITIALIZER(g_ctrlrs);
static int g_td_count;
static pthread_t g_ctrlr_thread_id = 0;
static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
static bool g_error;
struct spdk_fio_qpair {
struct fio_file *f;
struct spdk_nvme_qpair *qpair;
struct spdk_nvme_ns *ns;
uint32_t io_flags;
bool zone_append_enabled;
bool nvme_pi_enabled;
/* True for DIF and false for DIX, and this is valid only if nvme_pi_enabled is true. */
bool extended_lba;
/* True for protection info transferred at start of metadata,
* false for protection info transferred at end of metadata, and
* this is valid only if nvme_pi_enabled is true.
*/
bool md_start;
TAILQ_ENTRY(spdk_fio_qpair) link;
struct spdk_fio_ctrlr *fio_ctrlr;
};
struct spdk_fio_thread {
struct thread_data *td;
TAILQ_HEAD(, spdk_fio_qpair) fio_qpair;
struct spdk_fio_qpair *fio_qpair_current; /* the current fio_qpair to be handled. */
struct io_u **iocq; /* io completion queue */
unsigned int iocq_count; /* number of iocq entries filled by last getevents */
unsigned int iocq_size; /* number of iocq entries allocated */
struct fio_file *current_f; /* fio_file given by user */
};
static void *
spdk_fio_poll_ctrlrs(void *arg)
{
struct spdk_fio_ctrlr *fio_ctrlr;
int oldstate;
int rc;
/* Loop until the thread is cancelled */
while (true) {
rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
if (rc != 0) {
SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n",
rc, spdk_strerror(rc));
}
pthread_mutex_lock(&g_mutex);
TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) {
spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr);
}
pthread_mutex_unlock(&g_mutex);
rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
if (rc != 0) {
SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n",
rc, spdk_strerror(rc));
}
/* This is a pthread cancellation point and cannot be removed. */
sleep(1);
}
return NULL;
}
static bool
probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr_opts *opts)
{
struct thread_data *td = cb_ctx;
struct spdk_fio_options *fio_options = td->eo;
if (fio_options->hostnqn) {
snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn);
}
if (fio_options->enable_wrr) {
opts->arb_mechanism = SPDK_NVME_CC_AMS_WRR;
opts->arbitration_burst = fio_options->arbitration_burst;
opts->low_priority_weight = fio_options->low_weight;
opts->medium_priority_weight = fio_options->medium_weight;
opts->high_priority_weight = fio_options->high_weight;
}
if (fio_options->digest_enable) {
if (strcasecmp(fio_options->digest_enable, "HEADER") == 0) {
opts->header_digest = true;
} else if (strcasecmp(fio_options->digest_enable, "DATA") == 0) {
opts->data_digest = true;
} else if (strcasecmp(fio_options->digest_enable, "BOTH") == 0) {
opts->header_digest = true;
opts->data_digest = true;
}
}
return true;
}
static struct spdk_fio_ctrlr *
get_fio_ctrlr(const struct spdk_nvme_transport_id *trid)
{
struct spdk_fio_ctrlr *fio_ctrlr;
TAILQ_FOREACH(fio_ctrlr, &g_ctrlrs, link) {
if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) {
return fio_ctrlr;
}
}
return NULL;
}
/**
* Returns the fio_qpair matching the given fio_file and has an associated ns
*/
static struct spdk_fio_qpair *
get_fio_qpair(struct spdk_fio_thread *fio_thread, struct fio_file *f)
{
struct spdk_fio_qpair *fio_qpair;
TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) {
if ((fio_qpair->f == f) && fio_qpair->ns) {
return fio_qpair;
}
}
return NULL;
}
#if FIO_HAS_ZBD
/**
* Callback function to use while processing completions until completion-indicator turns non-zero
*/
static void
pcu_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
{
int *completed = ctx;
*completed = spdk_nvme_cpl_is_error(cpl) ? -1 : 1;
}
/**
* Process Completions Until the given 'completed' indicator turns non-zero or an error occurs
*/
static int32_t
pcu(struct spdk_nvme_qpair *qpair, int *completed)
{
int32_t ret;
while (!*completed) {
ret = spdk_nvme_qpair_process_completions(qpair, 1);
if (ret < 0) {
log_err("spdk/nvme: process_compl(): ret: %d\n", ret);
return ret;
}
}
return 0;
}
#endif
static inline uint32_t
_nvme_get_host_buffer_sector_size(struct spdk_nvme_ns *ns, uint32_t io_flags)
{
bool md_excluded_from_xfer = false;
uint32_t md_size;
uint32_t ns_flags;
ns_flags = spdk_nvme_ns_get_flags(ns);
md_size = spdk_nvme_ns_get_md_size(ns);
/* For extended LBA format, if the metadata size is 8 bytes and PRACT is
* enabled(controller inserts/strips PI), we should reduce metadata size
* from block size.
*/
md_excluded_from_xfer = ((io_flags & SPDK_NVME_IO_FLAGS_PRACT) &&
(ns_flags & SPDK_NVME_NS_EXTENDED_LBA_SUPPORTED) &&
(ns_flags & SPDK_NVME_NS_DPS_PI_SUPPORTED) &&
(md_size == 8));
return md_excluded_from_xfer ? spdk_nvme_ns_get_sector_size(ns) :
spdk_nvme_ns_get_extended_sector_size(ns);
}
static void
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
{
struct thread_data *td = cb_ctx;
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_ctrlr *fio_ctrlr;
struct spdk_fio_qpair *fio_qpair;
struct spdk_nvme_ns *ns;
const struct spdk_nvme_ns_data *nsdata;
struct fio_file *f = fio_thread->current_f;
uint32_t ns_id;
char *p;
long int tmp;
uint32_t block_size;
struct spdk_fio_options *fio_options = td->eo;
p = strstr(f->file_name, "ns=");
if (p != NULL) {
tmp = spdk_strtol(p + 3, 10);
if (tmp <= 0) {
SPDK_ERRLOG("namespace id should be >=1, but was invalid: %ld\n", tmp);
g_error = true;
return;
}
ns_id = (uint32_t)tmp;
} else {
ns_id = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
if (ns_id == 0) {
/* The ctrlr has no active namespaces and we didn't specify any so nothing to do. */
return;
}
}
pthread_mutex_lock(&g_mutex);
fio_ctrlr = get_fio_ctrlr(trid);
/* it is a new ctrlr and needs to be added */
if (!fio_ctrlr) {
/* Create an fio_ctrlr and add it to the list */
fio_ctrlr = calloc(1, sizeof(*fio_ctrlr));
if (!fio_ctrlr) {
SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n");
g_error = true;
pthread_mutex_unlock(&g_mutex);
return;
}
fio_ctrlr->opts = *opts;
fio_ctrlr->ctrlr = ctrlr;
fio_ctrlr->tr_id = *trid;
TAILQ_INSERT_TAIL(&g_ctrlrs, fio_ctrlr, link);
}
pthread_mutex_unlock(&g_mutex);
ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id);
if (ns == NULL) {
SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id);
g_error = true;
return;
}
if (!spdk_nvme_ns_is_active(ns)) {
SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id);
g_error = true;
return;
}
nsdata = spdk_nvme_ns_get_data(ns);
TAILQ_FOREACH(fio_qpair, &fio_thread->fio_qpair, link) {
if ((fio_qpair->f == f) ||
((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) &&
(spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) {
/* Not the error case. Avoid duplicated connection */
return;
}
}
/* create a new qpair */
fio_qpair = calloc(1, sizeof(*fio_qpair));
if (!fio_qpair) {
g_error = true;
SPDK_ERRLOG("Cannot allocate space for fio_qpair\n");
return;
}
f->engine_data = fio_qpair;
fio_qpair->ns = ns;
fio_qpair->f = f;
fio_qpair->fio_ctrlr = fio_ctrlr;
TAILQ_INSERT_TAIL(&fio_thread->fio_qpair, fio_qpair, link);
if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) {
assert(spdk_nvme_ns_get_pi_type(ns) != SPDK_NVME_FMT_NVM_PROTECTION_DISABLE);
fio_qpair->io_flags = g_spdk_pract_flag | g_spdk_prchk_flags;
fio_qpair->nvme_pi_enabled = true;
fio_qpair->md_start = nsdata->dps.md_start;
fio_qpair->extended_lba = spdk_nvme_ns_supports_extended_lba(ns);
fprintf(stdout, "PI type%u enabled with %s\n", spdk_nvme_ns_get_pi_type(ns),
fio_qpair->extended_lba ? "extended lba" : "separate metadata");
}
block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags);
if (td->o.bs[DDIR_READ] % block_size != 0 || td->o.bs[DDIR_WRITE] % block_size != 0) {
if (spdk_nvme_ns_supports_extended_lba(ns)) {
SPDK_ERRLOG("--bs has to be a multiple of (LBA data size + Metadata size)\n");
} else {
SPDK_ERRLOG("--bs has to be a multiple of LBA data size\n");
}
g_error = true;
return;
}
if (fio_options->zone_append && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) {
if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED) {
SPDK_DEBUGLOG(fio_nvme, "Using zone appends instead of writes on: '%s'\n",
f->file_name);
fio_qpair->zone_append_enabled = true;
} else {
SPDK_WARNLOG("Falling back to writes on: '%s' - ns lacks zone append cmd\n",
f->file_name);
}
}
if (fio_options->initial_zone_reset == 1 && spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS) {
#if FIO_HAS_ZBD
struct spdk_nvme_qpair *tmp_qpair;
int completed = 0, err;
/* qpair has not been allocated yet (it gets allocated in spdk_fio_open()).
* Create a temporary qpair in order to perform the initial zone reset.
*/
assert(!fio_qpair->qpair);
tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0);
if (!tmp_qpair) {
SPDK_ERRLOG("Cannot allocate a temporary qpair\n");
g_error = true;
return;
}
err = spdk_nvme_zns_reset_zone(ns, tmp_qpair, 0x0, true, pcu_cb, &completed);
if (err || pcu(tmp_qpair, &completed) || completed < 0) {
log_err("spdk/nvme: warn: initial_zone_reset: err: %d, cpl: %d\n",
err, completed);
}
spdk_nvme_ctrlr_free_io_qpair(tmp_qpair);
#else
log_err("spdk/nvme: ZBD/ZNS is not supported\n");
#endif
}
f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns);
if (f->real_file_size <= 0) {
g_error = true;
SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns);
return;
}
f->filetype = FIO_TYPE_BLOCK;
fio_file_set_size_known(f);
}
static void
parse_prchk_flags(const char *prchk_str)
{
if (!prchk_str) {
return;
}
if (strstr(prchk_str, "GUARD") != NULL) {
g_spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
}
if (strstr(prchk_str, "REFTAG") != NULL) {
g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
}
if (strstr(prchk_str, "APPTAG") != NULL) {
g_spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG;
}
}
static void
parse_pract_flag(int pract)
{
if (pract == 1) {
g_spdk_pract_flag = SPDK_NVME_IO_FLAGS_PRACT;
} else {
g_spdk_pract_flag = 0;
}
}
static bool
fio_redirected_to_dev_null(void)
{
char path[PATH_MAX] = "";
ssize_t ret;
ret = readlink("/proc/self/fd/1", path, sizeof(path));
if (ret == -1 || strcmp(path, "/dev/null") != 0) {
return false;
}
ret = readlink("/proc/self/fd/2", path, sizeof(path));
if (ret == -1 || strcmp(path, "/dev/null") != 0) {
return false;
}
return true;
}
/* Called once at initialization. This is responsible for gathering the size of
* each "file", which in our case are in the form
* 'key=value [key=value] ... ns=value'
* For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1'
* For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */
static int
spdk_fio_setup(struct thread_data *td)
{
struct spdk_fio_thread *fio_thread;
struct spdk_fio_options *fio_options = td->eo;
struct spdk_env_opts opts;
struct fio_file *f;
char *p;
int rc = 0;
struct spdk_nvme_transport_id trid;
struct spdk_fio_ctrlr *fio_ctrlr;
char *trid_info;
unsigned int i;
/*
* If we're running in a daemonized FIO instance, it's possible
* fd 1/2 were re-used for something important by FIO. Newer fio
* versions are careful to redirect those to /dev/null, but if we're
* not, we'll abort early, so we don't accidentally write messages to
* an important file, etc.
*/
if (is_backend && !fio_redirected_to_dev_null()) {
char buf[1024];
snprintf(buf, sizeof(buf),
"SPDK FIO plugin is in daemon mode, but stdout/stderr "
"aren't redirected to /dev/null. Aborting.");
fio_server_text_output(FIO_LOG_ERR, buf, sizeof(buf));
return -1;
}
if (!td->o.use_thread) {
log_err("spdk: must set thread=1 when using spdk plugin\n");
return 1;
}
if (g_log_flag_error) {
/* The first thread found an error when parsing log flags, so
* just return error immediately for all of the other threads.
*/
return 1;
}
pthread_mutex_lock(&g_mutex);
fio_thread = calloc(1, sizeof(*fio_thread));
assert(fio_thread != NULL);
td->io_ops_data = fio_thread;
fio_thread->td = td;
fio_thread->iocq_size = td->o.iodepth;
fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *));
assert(fio_thread->iocq != NULL);
TAILQ_INIT(&fio_thread->fio_qpair);
if (!g_spdk_env_initialized) {
spdk_env_opts_init(&opts);
opts.name = "fio";
opts.mem_size = fio_options->mem_size;
opts.shm_id = fio_options->shm_id;
g_spdk_enable_sgl = fio_options->enable_sgl;
g_spdk_sge_size = fio_options->sge_size;
g_spdk_bit_bucket_data_len = fio_options->bit_bucket_data_len;
parse_pract_flag(fio_options->pi_act);
g_spdk_md_per_io_size = spdk_max(fio_options->md_per_io_size, 4096);
g_spdk_apptag = (uint16_t)fio_options->apptag;
g_spdk_apptag_mask = (uint16_t)fio_options->apptag_mask;
parse_prchk_flags(fio_options->pi_chk);
if (spdk_env_init(&opts) < 0) {
SPDK_ERRLOG("Unable to initialize SPDK env\n");
free(fio_thread->iocq);
free(fio_thread);
fio_thread = NULL;
pthread_mutex_unlock(&g_mutex);
return 1;
}
if (fio_options->log_flags) {
char *tok = strtok(fio_options->log_flags, ",");
do {
rc = spdk_log_set_flag(tok);
if (rc < 0) {
SPDK_ERRLOG("unknown log flag %s\n", tok);
g_log_flag_error = true;
return 1;
}
} while ((tok = strtok(NULL, ",")) != NULL);
#ifdef DEBUG
spdk_log_set_print_level(SPDK_LOG_DEBUG);
#endif
}
g_spdk_env_initialized = true;
spdk_unaffinitize_thread();
/* Spawn a thread to continue polling the controllers */
rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL);
if (rc != 0) {
SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n");
}
if (fio_options->enable_vmd && spdk_vmd_init()) {
SPDK_ERRLOG("Failed to initialize VMD. Some NVMe devices can be unavailable.\n");
}
}
pthread_mutex_unlock(&g_mutex);
for_each_file(td, f, i) {
memset(&trid, 0, sizeof(trid));
trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
p = strstr(f->file_name, " ns=");
if (p != NULL) {
trid_info = strndup(f->file_name, p - f->file_name);
} else {
trid_info = strndup(f->file_name, strlen(f->file_name));
}
if (!trid_info) {
SPDK_ERRLOG("Failed to allocate space for trid_info\n");
continue;
}
rc = spdk_nvme_transport_id_parse(&trid, trid_info);
if (rc < 0) {
SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info);
free(trid_info);
continue;
}
free(trid_info);
if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
struct spdk_pci_addr pci_addr;
if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) {
SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr);
continue;
}
spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr);
} else {
if (trid.subnqn[0] == '\0') {
snprintf(trid.subnqn, sizeof(trid.subnqn), "%s",
SPDK_NVMF_DISCOVERY_NQN);
}
}
fio_thread->current_f = f;
pthread_mutex_lock(&g_mutex);
fio_ctrlr = get_fio_ctrlr(&trid);
pthread_mutex_unlock(&g_mutex);
if (fio_ctrlr) {
attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts);
} else {
/* Enumerate all of the controllers */
if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) {
SPDK_ERRLOG("spdk_nvme_probe() failed\n");
continue;
}
}
if (g_error) {
log_err("Failed to initialize spdk fio plugin\n");
rc = 1;
break;
}
}
pthread_mutex_lock(&g_mutex);
g_td_count++;
pthread_mutex_unlock(&g_mutex);
return rc;
}
static int
spdk_fio_open(struct thread_data *td, struct fio_file *f)
{
struct spdk_fio_qpair *fio_qpair = f->engine_data;
struct spdk_fio_ctrlr *fio_ctrlr = fio_qpair->fio_ctrlr;
struct spdk_fio_options *fio_options = td->eo;
struct spdk_nvme_io_qpair_opts qpopts;
assert(fio_qpair->qpair == NULL);
spdk_nvme_ctrlr_get_default_io_qpair_opts(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts));
qpopts.delay_cmd_submit = true;
if (fio_options->enable_wrr) {
qpopts.qprio = fio_options->wrr_priority;
}
fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, &qpopts, sizeof(qpopts));
if (!fio_qpair->qpair) {
SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n");
g_error = true;
free(fio_qpair);
return -1;
}
if (fio_options->print_qid_mappings == 1) {
log_info("job %s: %s qid %d\n", td->o.name, f->file_name,
spdk_nvme_qpair_get_id(fio_qpair->qpair));
}
return 0;
}
static int
spdk_fio_close(struct thread_data *td, struct fio_file *f)
{
struct spdk_fio_qpair *fio_qpair = f->engine_data;
assert(fio_qpair->qpair != NULL);
spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair);
fio_qpair->qpair = NULL;
return 0;
}
static int
spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem)
{
td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL);
return td->orig_buffer == NULL;
}
static void
spdk_fio_iomem_free(struct thread_data *td)
{
spdk_dma_free(td->orig_buffer);
}
static int
spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_request *fio_req;
io_u->engine_data = NULL;
fio_req = calloc(1, sizeof(*fio_req));
if (fio_req == NULL) {
return 1;
}
fio_req->md_buf = spdk_dma_zmalloc(g_spdk_md_per_io_size, NVME_IO_ALIGN, NULL);
if (fio_req->md_buf == NULL) {
fprintf(stderr, "Allocate %u metadata failed\n", g_spdk_md_per_io_size);
free(fio_req);
return 1;
}
fio_req->io = io_u;
fio_req->fio_thread = fio_thread;
io_u->engine_data = fio_req;
return 0;
}
static void
spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
{
struct spdk_fio_request *fio_req = io_u->engine_data;
if (fio_req) {
assert(fio_req->io == io_u);
spdk_dma_free(fio_req->md_buf);
free(fio_req);
io_u->engine_data = NULL;
}
}
static inline uint64_t
fio_offset_to_zslba(unsigned long long offset, struct spdk_nvme_ns *ns)
{
return (offset / spdk_nvme_zns_ns_get_zone_size(ns)) * spdk_nvme_zns_ns_get_zone_size_sectors(ns);
}
static int
fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
{
struct spdk_nvme_ns *ns = fio_qpair->ns;
struct spdk_fio_request *fio_req = io_u->engine_data;
uint32_t md_size, extended_lba_size, lba_count;
uint64_t lba;
struct iovec iov;
int rc;
/* Set appmask and apptag when PRACT is enabled */
if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) {
fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask;
fio_req->dif_ctx.app_tag = g_spdk_apptag;
return 0;
}
extended_lba_size = spdk_nvme_ns_get_extended_sector_size(ns);
md_size = spdk_nvme_ns_get_md_size(ns);
lba = io_u->offset / extended_lba_size;
lba_count = io_u->xfer_buflen / extended_lba_size;
rc = spdk_dif_ctx_init(&fio_req->dif_ctx, extended_lba_size, md_size,
true, fio_qpair->md_start,
(enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns),
fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 0, 0);
if (rc != 0) {
fprintf(stderr, "Initialization of DIF context failed\n");
return rc;
}
if (io_u->ddir != DDIR_WRITE) {
return 0;
}
iov.iov_base = io_u->buf;
iov.iov_len = io_u->xfer_buflen;
rc = spdk_dif_generate(&iov, 1, lba_count, &fio_req->dif_ctx);
if (rc != 0) {
fprintf(stderr, "Generation of DIF failed\n");
}
return rc;
}
static int
fio_separate_md_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
{
struct spdk_nvme_ns *ns = fio_qpair->ns;
struct spdk_fio_request *fio_req = io_u->engine_data;
uint32_t md_size, block_size, lba_count;
uint64_t lba;
struct iovec iov, md_iov;
int rc;
/* Set appmask and apptag when PRACT is enabled */
if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) {
fio_req->dif_ctx.apptag_mask = g_spdk_apptag_mask;
fio_req->dif_ctx.app_tag = g_spdk_apptag;
return 0;
}
block_size = spdk_nvme_ns_get_sector_size(ns);
md_size = spdk_nvme_ns_get_md_size(ns);
lba = io_u->offset / block_size;
lba_count = io_u->xfer_buflen / block_size;
rc = spdk_dif_ctx_init(&fio_req->dif_ctx, block_size, md_size,
false, fio_qpair->md_start,
(enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns),
fio_qpair->io_flags, lba, g_spdk_apptag_mask, g_spdk_apptag, 0, 0);
if (rc != 0) {
fprintf(stderr, "Initialization of DIF context failed\n");
return rc;
}
if (io_u->ddir != DDIR_WRITE) {
return 0;
}
iov.iov_base = io_u->buf;
iov.iov_len = io_u->xfer_buflen;
md_iov.iov_base = fio_req->md_buf;
md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size);
rc = spdk_dix_generate(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx);
if (rc < 0) {
fprintf(stderr, "Generation of DIX failed\n");
}
return rc;
}
static int
fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
{
struct spdk_nvme_ns *ns = fio_qpair->ns;
struct spdk_fio_request *fio_req = io_u->engine_data;
uint32_t lba_count;
struct iovec iov;
struct spdk_dif_error err_blk = {};
int rc;
/* Do nothing when PRACT is enabled */
if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) {
return 0;
}
iov.iov_base = io_u->buf;
iov.iov_len = io_u->xfer_buflen;
lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_extended_sector_size(ns);
rc = spdk_dif_verify(&iov, 1, lba_count, &fio_req->dif_ctx, &err_blk);
if (rc != 0) {
fprintf(stderr, "DIF error detected. type=%d, offset=%" PRIu32 "\n",
err_blk.err_type, err_blk.err_offset);
}
return rc;
}
static int
fio_separate_md_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
{
struct spdk_nvme_ns *ns = fio_qpair->ns;
struct spdk_fio_request *fio_req = io_u->engine_data;
uint32_t md_size, lba_count;
struct iovec iov, md_iov;
struct spdk_dif_error err_blk = {};
int rc;
/* Do nothing when PRACT is enabled */
if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) {
return 0;
}
iov.iov_base = io_u->buf;
iov.iov_len = io_u->xfer_buflen;
lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_sector_size(ns);
md_size = spdk_nvme_ns_get_md_size(ns);
md_iov.iov_base = fio_req->md_buf;
md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size);
rc = spdk_dix_verify(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx, &err_blk);
if (rc != 0) {
fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n",
err_blk.err_type, err_blk.err_offset);
}
return rc;
}
static void
spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
{
struct spdk_fio_request *fio_req = ctx;
struct spdk_fio_thread *fio_thread = fio_req->fio_thread;
struct spdk_fio_qpair *fio_qpair = fio_req->fio_qpair;
int rc;
if (fio_qpair->nvme_pi_enabled && fio_req->io->ddir == DDIR_READ) {
if (fio_qpair->extended_lba) {
rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io);
} else {
rc = fio_separate_md_verify_pi(fio_qpair, fio_req->io);
}
if (rc != 0) {
fio_req->io->error = abs(rc);
}
}
if (spdk_nvme_cpl_is_error(cpl)) {
fio_req->io->error = EIO;
}
assert(fio_thread->iocq_count < fio_thread->iocq_size);
fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io;
}
static void
spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset)
{
struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref;
fio_req->iov_offset = sgl_offset;
fio_req->bit_bucket_data_len = 0;
}
static int
spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length)
{
struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref;
struct io_u *io_u = fio_req->io;
uint32_t iov_len;
uint32_t bit_bucket_len;
*address = io_u->buf;
if (fio_req->iov_offset) {
assert(fio_req->iov_offset <= io_u->xfer_buflen);
*address += fio_req->iov_offset;
}
iov_len = io_u->xfer_buflen - fio_req->iov_offset;
if (iov_len > g_spdk_sge_size) {
iov_len = g_spdk_sge_size;
}
if ((fio_req->bit_bucket_data_len < g_spdk_bit_bucket_data_len) && (io_u->ddir == DDIR_READ)) {
assert(g_spdk_bit_bucket_data_len < io_u->xfer_buflen);
*address = (void *)UINT64_MAX;
bit_bucket_len = g_spdk_bit_bucket_data_len - fio_req->bit_bucket_data_len;
if (iov_len > bit_bucket_len) {
iov_len = bit_bucket_len;
}
fio_req->bit_bucket_data_len += iov_len;
}
fio_req->iov_offset += iov_len;
*length = iov_len;
return 0;
}
#if FIO_IOOPS_VERSION >= 24
typedef enum fio_q_status fio_q_status_t;
#else
typedef int fio_q_status_t;
#endif
static fio_q_status_t
spdk_fio_queue(struct thread_data *td, struct io_u *io_u)
{
int rc = 1;
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_request *fio_req = io_u->engine_data;
struct spdk_fio_qpair *fio_qpair;
struct spdk_nvme_ns *ns = NULL;
void *md_buf = NULL;
struct spdk_dif_ctx *dif_ctx = &fio_req->dif_ctx;
uint32_t block_size;
uint64_t lba;
uint32_t lba_count;
fio_qpair = get_fio_qpair(fio_thread, io_u->file);
if (fio_qpair == NULL) {
return -ENXIO;
}
ns = fio_qpair->ns;
if (fio_qpair->nvme_pi_enabled && !fio_qpair->extended_lba) {
md_buf = fio_req->md_buf;
}
fio_req->fio_qpair = fio_qpair;
block_size = _nvme_get_host_buffer_sector_size(ns, fio_qpair->io_flags);
lba = io_u->offset / block_size;
lba_count = io_u->xfer_buflen / block_size;
/* TODO: considering situations that fio will randomize and verify io_u */
if (fio_qpair->nvme_pi_enabled) {
if (fio_qpair->extended_lba) {
rc = fio_extended_lba_setup_pi(fio_qpair, io_u);
} else {
rc = fio_separate_md_setup_pi(fio_qpair, io_u);
}
if (rc < 0) {
io_u->error = -rc;
return FIO_Q_COMPLETED;
}
}
switch (io_u->ddir) {
case DDIR_READ:
if (!g_spdk_enable_sgl) {
rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba, lba_count,
spdk_fio_completion_cb, fio_req,
fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag);
} else {
rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba,
lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags,
spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf,
dif_ctx->apptag_mask, dif_ctx->app_tag);
}
break;
case DDIR_WRITE:
if (!g_spdk_enable_sgl) {
if (!fio_qpair->zone_append_enabled) {
rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, lba,
lba_count,
spdk_fio_completion_cb, fio_req,
fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag);
} else {
uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns);
rc = spdk_nvme_zns_zone_append_with_md(ns, fio_qpair->qpair, io_u->buf, md_buf, zslba,
lba_count,
spdk_fio_completion_cb, fio_req,
fio_qpair->io_flags, dif_ctx->apptag_mask, dif_ctx->app_tag);
}
} else {
if (!fio_qpair->zone_append_enabled) {
rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba,
lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags,
spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf,
dif_ctx->apptag_mask, dif_ctx->app_tag);
} else {
uint64_t zslba = fio_offset_to_zslba(io_u->offset, ns);
rc = spdk_nvme_zns_zone_appendv_with_md(ns, fio_qpair->qpair, zslba,
lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags,
spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, md_buf,
dif_ctx->apptag_mask, dif_ctx->app_tag);
}
}
break;
default:
assert(false);
break;
}
/* NVMe read/write functions return -ENOMEM if there are no free requests. */
if (rc == -ENOMEM) {
return FIO_Q_BUSY;
}
if (rc != 0) {
io_u->error = abs(rc);
return FIO_Q_COMPLETED;
}
return FIO_Q_QUEUED;
}
static struct io_u *
spdk_fio_event(struct thread_data *td, int event)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
assert(event >= 0);
assert((unsigned)event < fio_thread->iocq_count);
return fio_thread->iocq[event];
}
static int
spdk_fio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, const struct timespec *t)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair = NULL;
struct timespec t0, t1;
uint64_t timeout = 0;
if (t) {
timeout = t->tv_sec * 1000000000L + t->tv_nsec;
clock_gettime(CLOCK_MONOTONIC_RAW, &t0);
}
fio_thread->iocq_count = 0;
/* fetch the next qpair */
if (fio_thread->fio_qpair_current) {
fio_qpair = TAILQ_NEXT(fio_thread->fio_qpair_current, link);
}
for (;;) {
if (fio_qpair == NULL) {
fio_qpair = TAILQ_FIRST(&fio_thread->fio_qpair);
}
while (fio_qpair != NULL) {
/*
* We can be called while spdk_fio_open()s are still
* ongoing, in which case, ->qpair can still be NULL.
*/
if (fio_qpair->qpair == NULL) {
fio_qpair = TAILQ_NEXT(fio_qpair, link);
continue;
}
spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count);
if (fio_thread->iocq_count >= min) {
/* reset the current handling qpair */
fio_thread->fio_qpair_current = fio_qpair;
return fio_thread->iocq_count;
}
fio_qpair = TAILQ_NEXT(fio_qpair, link);
}
if (t) {
uint64_t elapse;
clock_gettime(CLOCK_MONOTONIC_RAW, &t1);
elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L)
+ t1.tv_nsec - t0.tv_nsec;
if (elapse > timeout) {
break;
}
}
}
/* reset the current handling qpair */
fio_thread->fio_qpair_current = fio_qpair;
return fio_thread->iocq_count;
}
static int
spdk_fio_invalidate(struct thread_data *td, struct fio_file *f)
{
/* TODO: This should probably send a flush to the device, but for now just return successful. */
return 0;
}
#if FIO_HAS_ZBD
static int
spdk_fio_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair = NULL;
const struct spdk_nvme_zns_ns_data *zns_data = NULL;
if (f->filetype != FIO_TYPE_BLOCK) {
log_info("spdk/nvme: unsupported filetype: %d\n", f->filetype);
return -EINVAL;
}
fio_qpair = get_fio_qpair(fio_thread, f);
if (!fio_qpair) {
log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name);
return -ENODEV;
}
switch (spdk_nvme_ns_get_csi(fio_qpair->ns)) {
case SPDK_NVME_CSI_NVM:
*model = ZBD_NONE;
return 0;
case SPDK_NVME_CSI_KV:
log_err("spdk/nvme: KV namespace is currently not supported\n");
return -ENOSYS;
case SPDK_NVME_CSI_ZNS:
zns_data = spdk_nvme_zns_ns_get_data(fio_qpair->ns);
if (!zns_data) {
log_err("spdk/nvme: file_name: '%s', ZNS is not enabled\n", f->file_name);
return -EINVAL;
}
*model = ZBD_HOST_MANAGED;
return 0;
}
return -EINVAL;
}
static int
spdk_fio_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
struct zbd_zone *zbdz, unsigned int nr_zones)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair = NULL;
const struct spdk_nvme_zns_ns_data *zns = NULL;
struct spdk_nvme_zns_zone_report *report;
struct spdk_nvme_qpair *tmp_qpair;
uint32_t report_nzones = 0, report_nzones_max, report_nbytes, mdts_nbytes;
uint64_t zsze_nbytes, ns_nzones, lba_nbytes;
int completed = 0, err;
fio_qpair = get_fio_qpair(fio_thread, f);
if (!fio_qpair) {
log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name);
return -ENODEV;
}
zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns);
if (!zns) {
log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name);
return -EINVAL;
}
/* qpair has not been allocated yet (it gets allocated in spdk_fio_open()).
* Create a temporary qpair in order to perform report zones.
*/
assert(!fio_qpair->qpair);
tmp_qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_qpair->fio_ctrlr->ctrlr, NULL, 0);
if (!tmp_qpair) {
log_err("spdk/nvme: cannot allocate a temporary qpair\n");
return -EIO;
}
/** Retrieve device parameters */
mdts_nbytes = spdk_nvme_ns_get_max_io_xfer_size(fio_qpair->ns);
lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns);
zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns);
ns_nzones = spdk_nvme_zns_ns_get_num_zones(fio_qpair->ns);
/** Allocate report-buffer without exceeding mdts, zbdz-storage, and what is needed */
report_nzones_max = (mdts_nbytes - sizeof(*report)) / sizeof(report->descs[0]);
report_nzones_max = spdk_min(spdk_min(report_nzones_max, nr_zones), ns_nzones);
report_nbytes = sizeof(report->descs[0]) * report_nzones_max + sizeof(*report);
report = calloc(1, report_nbytes);
if (!report) {
log_err("spdk/nvme: failed report_zones(): ENOMEM\n");
err = -ENOMEM;
goto exit;
}
err = spdk_nvme_zns_report_zones(fio_qpair->ns, tmp_qpair, report, report_nbytes,
offset / lba_nbytes, SPDK_NVME_ZRA_LIST_ALL, true, pcu_cb,
&completed);
if (err || pcu(tmp_qpair, &completed) || completed < 0) {
log_err("spdk/nvme: report_zones(): err: %d, cpl: %d\n", err, completed);
err = err ? err : -EIO;
goto exit;
}
assert(report->nr_zones <= report_nzones_max);
report_nzones = report->nr_zones;
for (uint64_t idx = 0; idx < report->nr_zones; ++idx) {
struct spdk_nvme_zns_zone_desc *zdesc = &report->descs[idx];
zbdz[idx].start = zdesc->zslba * lba_nbytes;
zbdz[idx].len = zsze_nbytes;
zbdz[idx].capacity = zdesc->zcap * lba_nbytes;
zbdz[idx].wp = zdesc->wp * lba_nbytes;
switch (zdesc->zt) {
case SPDK_NVME_ZONE_TYPE_SEQWR:
zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
break;
default:
log_err("spdk/nvme: %s: inv. zone-type: 0x%x\n", f->file_name, zdesc->zt);
err = -EIO;
goto exit;
}
switch (zdesc->zs) {
case SPDK_NVME_ZONE_STATE_EMPTY:
zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
break;
case SPDK_NVME_ZONE_STATE_IOPEN:
zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
break;
case SPDK_NVME_ZONE_STATE_EOPEN:
zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
break;
case SPDK_NVME_ZONE_STATE_CLOSED:
zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
break;
case SPDK_NVME_ZONE_STATE_RONLY:
zbdz[idx].cond = ZBD_ZONE_COND_READONLY;
break;
case SPDK_NVME_ZONE_STATE_FULL:
zbdz[idx].cond = ZBD_ZONE_COND_FULL;
break;
case SPDK_NVME_ZONE_STATE_OFFLINE:
zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
break;
default:
log_err("spdk/nvme: %s: inv. zone-state: 0x%x\n", f->file_name, zdesc->zs);
err = -EIO;
goto exit;
}
}
exit:
spdk_nvme_ctrlr_free_io_qpair(tmp_qpair);
free(report);
return err ? err : (int)report_nzones;
}
static int
spdk_fio_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset, uint64_t length)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair = NULL;
const struct spdk_nvme_zns_ns_data *zns = NULL;
uint64_t zsze_nbytes, lba_nbytes;
int err = 0;
fio_qpair = get_fio_qpair(fio_thread, f);
if (!fio_qpair) {
log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name);
return -ENODEV;
}
zns = spdk_nvme_zns_ns_get_data(fio_qpair->ns);
if (!zns) {
log_err("spdk/nvme: file_name: '%s', zns is not enabled\n", f->file_name);
return -EINVAL;
}
zsze_nbytes = spdk_nvme_zns_ns_get_zone_size(fio_qpair->ns);
lba_nbytes = spdk_nvme_ns_get_sector_size(fio_qpair->ns);
/** check the assumption that offset is valid zone-start lba */
if (offset % zsze_nbytes) {
log_err("spdk/nvme: offset: %zu is not a valid zslba\n", offset);
return -EINVAL;
}
for (uint64_t cur = offset; cur < offset + length; cur += zsze_nbytes) {
int completed = 0;
err = spdk_nvme_zns_reset_zone(fio_qpair->ns, fio_qpair->qpair, cur / lba_nbytes,
false, pcu_cb, &completed);
if (err || pcu(fio_qpair->qpair, &completed) || completed < 0) {
log_err("spdk/nvme: zns_reset_zone(): err: %d, cpl: %d\n", err, completed);
err = err ? err : -EIO;
break;
}
}
return err;
}
#endif
#if FIO_IOOPS_VERSION >= 30
static int
spdk_fio_get_max_open_zones(struct thread_data *td, struct fio_file *f,
unsigned int *max_open_zones)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair = NULL;
fio_qpair = get_fio_qpair(fio_thread, f);
if (!fio_qpair) {
log_err("spdk/nvme: no ns/qpair or file_name: '%s'\n", f->file_name);
return -ENODEV;
}
*max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(fio_qpair->ns);
return 0;
}
#endif
static void
spdk_fio_cleanup(struct thread_data *td)
{
struct spdk_fio_thread *fio_thread = td->io_ops_data;
struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp;
struct spdk_fio_options *fio_options = td->eo;
TAILQ_FOREACH_SAFE(fio_qpair, &fio_thread->fio_qpair, link, fio_qpair_tmp) {
TAILQ_REMOVE(&fio_thread->fio_qpair, fio_qpair, link);
free(fio_qpair);
}
free(fio_thread->iocq);
free(fio_thread);
pthread_mutex_lock(&g_mutex);
g_td_count--;
if (g_td_count == 0) {
struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp;
struct spdk_nvme_detach_ctx *detach_ctx = NULL;
TAILQ_FOREACH_SAFE(fio_ctrlr, &g_ctrlrs, link, fio_ctrlr_tmp) {
TAILQ_REMOVE(&g_ctrlrs, fio_ctrlr, link);
spdk_nvme_detach_async(fio_ctrlr->ctrlr, &detach_ctx);
free(fio_ctrlr);
}
if (detach_ctx) {
spdk_nvme_detach_poll(detach_ctx);
}
if (fio_options->enable_vmd) {
spdk_vmd_fini();
}
}
pthread_mutex_unlock(&g_mutex);
if (TAILQ_EMPTY(&g_ctrlrs)) {
if (pthread_cancel(g_ctrlr_thread_id) == 0) {
pthread_join(g_ctrlr_thread_id, NULL);
}
}
}
/* This function enables addition of SPDK parameters to the fio config
* Adding new parameters by defining them here and defining a callback
* function to read the parameter value. */
static struct fio_option options[] = {
{
.name = "enable_wrr",
.lname = "Enable weighted round robin (WRR) for IO submission queues",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, enable_wrr),
.def = "0",
.help = "Enable weighted round robin (WRR) for IO submission queues",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "arbitration_burst",
.lname = "Arbitration Burst",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, arbitration_burst),
.def = "0",
.help = "Arbitration Burst used for WRR (valid range from 0 - 7)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "low_weight",
.lname = "low_weight for WRR",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, low_weight),
.def = "0",
.help = "low_weight used for WRR (valid range from 0 - 255)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "medium_weight",
.lname = "medium_weight for WRR",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, medium_weight),
.def = "0",
.help = "medium weight used for WRR (valid range from 0 - 255)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "high_weight",
.lname = "high_weight for WRR",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, high_weight),
.def = "0",
.help = "high weight used for WRR (valid range from 0 - 255)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "wrr_priority",
.lname = "priority used for WRR",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, wrr_priority),
.def = "0",
.help = "priority used for WRR (valid range from 0-3)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "mem_size_mb",
.lname = "Memory size in MB",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, mem_size),
.def = "0",
.help = "Memory Size for SPDK (MB)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "shm_id",
.lname = "shared memory ID",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, shm_id),
.def = "-1",
.help = "Shared Memory ID",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "enable_sgl",
.lname = "SGL used for I/O commands",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, enable_sgl),
.def = "0",
.help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "sge_size",
.lname = "SGL size used for I/O commands",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, sge_size),
.def = "4096",
.help = "SGL size in bytes for I/O Commands (default 4096)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "bit_bucket_data_len",
.lname = "Amount of data used for Bit Bucket",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, bit_bucket_data_len),
.def = "0",
.help = "Bit Bucket Data Length for READ commands (disabled by default)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "hostnqn",
.lname = "Host NQN to use when connecting to controllers.",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct spdk_fio_options, hostnqn),
.help = "Host NQN",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "pi_act",
.lname = "Protection Information Action",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, pi_act),
.def = "1",
.help = "Protection Information Action bit (pi_act=1 or pi_act=0)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "pi_chk",
.lname = "Protection Information Check(GUARD|REFTAG|APPTAG)",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct spdk_fio_options, pi_chk),
.def = NULL,
.help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "md_per_io_size",
.lname = "Separate Metadata Buffer Size per I/O",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, md_per_io_size),
.def = "4096",
.help = "Size of separate metadata buffer per I/O (Default: 4096)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "apptag",
.lname = "Application Tag used in Protection Information",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, apptag),
.def = "0x1234",
.help = "Application Tag used in Protection Information field (Default: 0x1234)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "apptag_mask",
.lname = "Application Tag Mask",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, apptag_mask),
.def = "0xffff",
.help = "Application Tag Mask used with Application Tag (Default: 0xffff)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "digest_enable",
.lname = "PDU digest choice for NVMe/TCP Transport(NONE|HEADER|DATA|BOTH)",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct spdk_fio_options, digest_enable),
.def = NULL,
.help = "Control the NVMe/TCP control(digest_enable=NONE|HEADER|DATA|BOTH)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "enable_vmd",
.lname = "Enable VMD enumeration",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, enable_vmd),
.def = "0",
.help = "Enable VMD enumeration (enable_vmd=1 or enable_vmd=0)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "initial_zone_reset",
.lname = "Reset Zones on initialization",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, initial_zone_reset),
.def = "0",
.help = "Reset Zones on initialization (0=disable, 1=Reset All Zones)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "zone_append",
.lname = "Use zone append instead of write",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, zone_append),
.def = "0",
.help = "Use zone append instead of write (1=zone append, 0=write)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "print_qid_mappings",
.lname = "Print job-to-qid mappings",
.type = FIO_OPT_INT,
.off1 = offsetof(struct spdk_fio_options, print_qid_mappings),
.def = "0",
.help = "Print job-to-qid mappings (0=disable, 1=enable)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = "log_flags",
.lname = "log_flags",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct spdk_fio_options, log_flags),
.help = "Enable log flags (comma-separated list)",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_INVALID,
},
{
.name = NULL,
},
};
/* FIO imports this structure using dlsym */
struct ioengine_ops ioengine = {
.name = "spdk",
.version = FIO_IOOPS_VERSION,
.queue = spdk_fio_queue,
.getevents = spdk_fio_getevents,
.event = spdk_fio_event,
.cleanup = spdk_fio_cleanup,
.open_file = spdk_fio_open,
.close_file = spdk_fio_close,
.invalidate = spdk_fio_invalidate,
.iomem_alloc = spdk_fio_iomem_alloc,
.iomem_free = spdk_fio_iomem_free,
.setup = spdk_fio_setup,
.io_u_init = spdk_fio_io_u_init,
.io_u_free = spdk_fio_io_u_free,
#if FIO_HAS_ZBD
.get_zoned_model = spdk_fio_get_zoned_model,
.report_zones = spdk_fio_report_zones,
.reset_wp = spdk_fio_reset_wp,
#endif
#if FIO_IOOPS_VERSION >= 30
.get_max_open_zones = spdk_fio_get_max_open_zones,
#endif
.flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN,
.options = options,
.option_struct_size = sizeof(struct spdk_fio_options),
};
static void fio_init
fio_spdk_register(void)
{
register_ioengine(&ioengine);
}
static void fio_exit
fio_spdk_unregister(void)
{
if (g_spdk_env_initialized) {
spdk_env_fini();
}
unregister_ioengine(&ioengine);
}
SPDK_LOG_REGISTER_COMPONENT(fio_nvme)