Spdk/module/event/subsystems/nvmf/nvmf_tgt.c
Yuri 38a30da5bd nvmf: accepts new parameter to set poll groups mask
Parses and verifies hexadecimal cpu bit mask specified by the user.
Added verification to check for cpu cores range, making sure poll groups cores
assigned within the range of cpu cores allocated for the application.

RPC nvmf_set_config now takes an argument to configure ‘poll groups’,
a new parameter for NVMf subsystem. This parameter sets a CPU mask
to spawn threads which run an event loop for a ‘poll group’.

Change-Id: Ied9081c2213715ec94de00a8b37153730b8ac2ed
Signed-off-by: Yuri <yuriy.kirichok@hpe.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5611
Community-CI: Mellanox Build Bot
Reviewed-by: Matt Dumm <matt.dumm@hpe.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2021-07-28 08:06:00 +00:00

511 lines
14 KiB
C

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "event_nvmf.h"
#include "spdk/bdev.h"
#include "spdk/thread.h"
#include "spdk/log.h"
#include "spdk/nvme.h"
#include "spdk/nvmf_cmd.h"
#include "spdk_internal/usdt.h"
enum nvmf_tgt_state {
NVMF_TGT_INIT_NONE = 0,
NVMF_TGT_INIT_CREATE_TARGET,
NVMF_TGT_INIT_CREATE_POLL_GROUPS,
NVMF_TGT_INIT_START_SUBSYSTEMS,
NVMF_TGT_RUNNING,
NVMF_TGT_FINI_STOP_SUBSYSTEMS,
NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
NVMF_TGT_FINI_FREE_RESOURCES,
NVMF_TGT_STOPPED,
NVMF_TGT_ERROR,
};
struct nvmf_tgt_poll_group {
struct spdk_nvmf_poll_group *group;
struct spdk_thread *thread;
TAILQ_ENTRY(nvmf_tgt_poll_group) link;
};
struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf = {
.acceptor_poll_rate = ACCEPT_TIMEOUT_US,
.admin_passthru.identify_ctrlr = false
};
struct spdk_cpuset *g_poll_groups_mask = NULL;
struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
uint32_t g_spdk_nvmf_tgt_max_subsystems = 0;
uint16_t g_spdk_nvmf_tgt_crdt[3] = {0, 0, 0};
static enum nvmf_tgt_state g_tgt_state;
static struct spdk_thread *g_tgt_init_thread = NULL;
static struct spdk_thread *g_tgt_fini_thread = NULL;
static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
static size_t g_num_poll_groups = 0;
static void nvmf_tgt_advance_state(void);
static void
nvmf_shutdown_cb(void *arg1)
{
/* Still in initialization state, defer shutdown operation */
if (g_tgt_state < NVMF_TGT_RUNNING) {
spdk_thread_send_msg(spdk_get_thread(), nvmf_shutdown_cb, NULL);
return;
} else if (g_tgt_state != NVMF_TGT_RUNNING && g_tgt_state != NVMF_TGT_ERROR) {
/* Already in Shutdown status, ignore the signal */
return;
}
if (g_tgt_state == NVMF_TGT_ERROR) {
/* Parse configuration error */
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
} else {
g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
}
nvmf_tgt_advance_state();
}
static void
nvmf_subsystem_fini(void)
{
nvmf_shutdown_cb(NULL);
}
static void
_nvmf_tgt_destroy_poll_group_done(void *ctx)
{
assert(g_num_poll_groups > 0);
if (--g_num_poll_groups == 0) {
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
nvmf_tgt_advance_state();
}
}
static void
nvmf_tgt_destroy_poll_group_done(void *cb_arg, int status)
{
struct nvmf_tgt_poll_group *pg = cb_arg;
free(pg);
spdk_thread_send_msg(g_tgt_fini_thread, _nvmf_tgt_destroy_poll_group_done, NULL);
spdk_thread_exit(spdk_get_thread());
}
static void
nvmf_tgt_destroy_poll_group(void *ctx)
{
struct nvmf_tgt_poll_group *pg = ctx;
spdk_nvmf_poll_group_destroy(pg->group, nvmf_tgt_destroy_poll_group_done, pg);
}
static void
nvmf_tgt_destroy_poll_groups(void)
{
struct nvmf_tgt_poll_group *pg, *tpg;
g_tgt_fini_thread = spdk_get_thread();
assert(g_tgt_fini_thread != NULL);
TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) {
TAILQ_REMOVE(&g_poll_groups, pg, link);
spdk_thread_send_msg(pg->thread, nvmf_tgt_destroy_poll_group, pg);
}
}
static uint32_t
nvmf_get_cpuset_count(void)
{
if (g_poll_groups_mask) {
return spdk_cpuset_count(g_poll_groups_mask);
} else {
return spdk_env_get_core_count();
}
}
static void
nvmf_tgt_create_poll_group_done(void *ctx)
{
struct nvmf_tgt_poll_group *pg = ctx;
TAILQ_INSERT_TAIL(&g_poll_groups, pg, link);
assert(g_num_poll_groups < nvmf_get_cpuset_count());
if (++g_num_poll_groups == nvmf_get_cpuset_count()) {
g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS;
nvmf_tgt_advance_state();
}
}
static void
nvmf_tgt_create_poll_group(void *ctx)
{
struct nvmf_tgt_poll_group *pg;
pg = calloc(1, sizeof(*pg));
if (!pg) {
SPDK_ERRLOG("Not enough memory to allocate poll groups\n");
g_tgt_state = NVMF_TGT_ERROR;
nvmf_tgt_advance_state();
return;
}
pg->thread = spdk_get_thread();
pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt);
spdk_thread_send_msg(g_tgt_init_thread, nvmf_tgt_create_poll_group_done, pg);
}
static void
nvmf_tgt_create_poll_groups(void)
{
uint32_t i;
char thread_name[32];
struct spdk_thread *thread;
g_tgt_init_thread = spdk_get_thread();
assert(g_tgt_init_thread != NULL);
SPDK_ENV_FOREACH_CORE(i) {
if (g_poll_groups_mask && !spdk_cpuset_get_cpu(g_poll_groups_mask, i)) {
continue;
}
snprintf(thread_name, sizeof(thread_name), "nvmf_tgt_poll_group_%u", i);
thread = spdk_thread_create(thread_name, g_poll_groups_mask);
assert(thread != NULL);
spdk_thread_send_msg(thread, nvmf_tgt_create_poll_group, NULL);
}
}
static void
nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
{
subsystem = spdk_nvmf_subsystem_get_next(subsystem);
int rc;
if (subsystem) {
rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
if (rc) {
g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
nvmf_tgt_advance_state();
}
return;
}
g_tgt_state = NVMF_TGT_RUNNING;
nvmf_tgt_advance_state();
}
static void
nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
{
subsystem = spdk_nvmf_subsystem_get_next(subsystem);
int rc;
if (subsystem) {
rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
if (rc) {
SPDK_ERRLOG("Unable to stop NVMe-oF subsystem. Trying others.\n");
nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
}
return;
}
g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
nvmf_tgt_advance_state();
}
static void
nvmf_tgt_destroy_done(void *ctx, int status)
{
g_tgt_state = NVMF_TGT_STOPPED;
nvmf_tgt_advance_state();
}
static int
nvmf_add_discovery_subsystem(void)
{
struct spdk_nvmf_subsystem *subsystem;
subsystem = spdk_nvmf_subsystem_create(g_spdk_nvmf_tgt, SPDK_NVMF_DISCOVERY_NQN,
SPDK_NVMF_SUBTYPE_DISCOVERY, 0);
if (subsystem == NULL) {
SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
return -1;
}
spdk_nvmf_subsystem_set_allow_any_host(subsystem, true);
return 0;
}
static int
nvmf_tgt_create_target(void)
{
struct spdk_nvmf_target_opts opts = {
.name = "nvmf_tgt"
};
opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems;
opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf.acceptor_poll_rate;
opts.crdt[0] = g_spdk_nvmf_tgt_crdt[0];
opts.crdt[1] = g_spdk_nvmf_tgt_crdt[1];
opts.crdt[2] = g_spdk_nvmf_tgt_crdt[2];
g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts);
if (!g_spdk_nvmf_tgt) {
SPDK_ERRLOG("spdk_nvmf_tgt_create() failed\n");
return -1;
}
if (nvmf_add_discovery_subsystem() != 0) {
SPDK_ERRLOG("nvmf_add_discovery_subsystem failed\n");
return -1;
}
return 0;
}
static void
fixup_identify_ctrlr(struct spdk_nvmf_request *req)
{
uint32_t length;
int rc;
struct spdk_nvme_ctrlr_data *nvme_cdata;
struct spdk_nvme_ctrlr_data nvmf_cdata = {};
struct spdk_nvmf_ctrlr *ctrlr = spdk_nvmf_request_get_ctrlr(req);
struct spdk_nvme_cpl *rsp = spdk_nvmf_request_get_response(req);
/* This is the identify data from the NVMe drive */
spdk_nvmf_request_get_data(req, (void **)&nvme_cdata, &length);
/* Get the NVMF identify data */
rc = spdk_nvmf_ctrlr_identify_ctrlr(ctrlr, &nvmf_cdata);
if (rc != SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
return;
}
/* Fixup NVMF identify data with NVMe identify data */
/* Serial Number (SN) */
memcpy(&nvmf_cdata.sn[0], &nvme_cdata->sn[0], sizeof(nvmf_cdata.sn));
/* Model Number (MN) */
memcpy(&nvmf_cdata.mn[0], &nvme_cdata->mn[0], sizeof(nvmf_cdata.mn));
/* Firmware Revision (FR) */
memcpy(&nvmf_cdata.fr[0], &nvme_cdata->fr[0], sizeof(nvmf_cdata.fr));
/* IEEE OUI Identifier (IEEE) */
memcpy(&nvmf_cdata.ieee[0], &nvme_cdata->ieee[0], sizeof(nvmf_cdata.ieee));
/* FRU Globally Unique Identifier (FGUID) */
/* Copy the fixed up data back to the response */
memcpy(nvme_cdata, &nvmf_cdata, length);
}
static int
nvmf_custom_identify_hdlr(struct spdk_nvmf_request *req)
{
struct spdk_nvme_cmd *cmd = spdk_nvmf_request_get_cmd(req);
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc;
struct spdk_io_channel *ch;
struct spdk_nvmf_subsystem *subsys;
int rc;
if (cmd->cdw10_bits.identify.cns != SPDK_NVME_IDENTIFY_CTRLR) {
return -1; /* continue */
}
subsys = spdk_nvmf_request_get_subsystem(req);
if (subsys == NULL) {
return -1;
}
/* Only procss this request if it has exactly one namespace */
if (spdk_nvmf_subsystem_get_max_nsid(subsys) != 1) {
return -1;
}
/* Forward to first namespace if it supports NVME admin commands */
rc = spdk_nvmf_request_get_bdev(1, req, &bdev, &desc, &ch);
if (rc) {
/* No bdev found for this namespace. Continue. */
return -1;
}
if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) {
return -1;
}
return spdk_nvmf_bdev_ctrlr_nvme_passthru_admin(bdev, desc, ch, req, fixup_identify_ctrlr);
}
static void
nvmf_tgt_advance_state(void)
{
enum nvmf_tgt_state prev_state;
int rc = -1;
int ret;
do {
SPDK_DTRACE_PROBE1(nvmf_tgt_state, g_tgt_state);
prev_state = g_tgt_state;
switch (g_tgt_state) {
case NVMF_TGT_INIT_NONE: {
g_tgt_state = NVMF_TGT_INIT_CREATE_TARGET;
break;
}
case NVMF_TGT_INIT_CREATE_TARGET:
ret = nvmf_tgt_create_target();
g_tgt_state = (ret == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR;
break;
case NVMF_TGT_INIT_CREATE_POLL_GROUPS:
if (g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr) {
SPDK_NOTICELOG("Custom identify ctrlr handler enabled\n");
spdk_nvmf_set_custom_admin_cmd_hdlr(SPDK_NVME_OPC_IDENTIFY, nvmf_custom_identify_hdlr);
}
/* Create poll group threads, and send a message to each thread
* and create a poll group.
*/
nvmf_tgt_create_poll_groups();
break;
case NVMF_TGT_INIT_START_SUBSYSTEMS: {
struct spdk_nvmf_subsystem *subsystem;
subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
if (subsystem) {
ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
if (ret) {
SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
}
} else {
g_tgt_state = NVMF_TGT_RUNNING;
}
break;
}
case NVMF_TGT_RUNNING:
spdk_subsystem_init_next(0);
break;
case NVMF_TGT_FINI_STOP_SUBSYSTEMS: {
struct spdk_nvmf_subsystem *subsystem;
subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
if (subsystem) {
ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
if (ret) {
nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
}
} else {
g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
}
break;
}
case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
/* Send a message to each poll group thread, and terminate the thread */
nvmf_tgt_destroy_poll_groups();
break;
case NVMF_TGT_FINI_FREE_RESOURCES:
spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
break;
case NVMF_TGT_STOPPED:
spdk_subsystem_fini_next();
return;
case NVMF_TGT_ERROR:
spdk_subsystem_init_next(rc);
return;
}
} while (g_tgt_state != prev_state);
}
static void
nvmf_subsystem_init(void)
{
g_tgt_state = NVMF_TGT_INIT_NONE;
nvmf_tgt_advance_state();
}
static void
nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w)
{
spdk_json_write_array_begin(w);
spdk_json_write_object_begin(w);
spdk_json_write_named_string(w, "method", "nvmf_set_config");
spdk_json_write_named_object_begin(w, "params");
spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf.acceptor_poll_rate);
spdk_json_write_named_object_begin(w, "admin_cmd_passthru");
spdk_json_write_named_bool(w, "identify_ctrlr",
g_spdk_nvmf_tgt_conf.admin_passthru.identify_ctrlr);
spdk_json_write_object_end(w);
if (g_poll_groups_mask) {
spdk_json_write_named_string(w, "poll_groups_mask", spdk_cpuset_fmt(g_poll_groups_mask));
}
spdk_json_write_object_end(w);
spdk_json_write_object_end(w);
spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt);
spdk_json_write_array_end(w);
}
static struct spdk_subsystem g_spdk_subsystem_nvmf = {
.name = "nvmf",
.init = nvmf_subsystem_init,
.fini = nvmf_subsystem_fini,
.write_config_json = nvmf_subsystem_write_config_json,
};
SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf)
SPDK_SUBSYSTEM_DEPEND(nvmf, bdev)
SPDK_SUBSYSTEM_DEPEND(nvmf, sock)