From 77ab6f2839fdd4cdb5969aec456895931d02cd91 Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Mon, 29 Jun 2020 10:56:47 -0700 Subject: [PATCH] nvmf: Eliminate spdk_nvmf_tgt_accept() The poller is now created internally to the library whenever a target is constructed. Applications are not expected to poll for connections any longer. Change-Id: I523eb6adcc042c1ba2ed41b1cb41256b8bf63772 Signed-off-by: Ben Walker Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3583 Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Aleksey Marchuk Reviewed-by: Changpeng Liu --- CHANGELOG.md | 8 ++--- doc/nvmf_tgt_pg.md | 24 +++---------- examples/nvmf/nvmf/nvmf.c | 30 +++------------- include/spdk/nvmf.h | 8 +---- lib/nvmf/nvmf.c | 46 +++++++++++++++++-------- lib/nvmf/nvmf_internal.h | 2 ++ lib/nvmf/spdk_nvmf.map | 1 - module/event/subsystems/nvmf/conf.c | 1 + module/event/subsystems/nvmf/nvmf_tgt.c | 34 ++---------------- 9 files changed, 51 insertions(+), 103 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 247e9582d..75573e295 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -117,10 +117,10 @@ implementations of the transport interface will have to implement that function. The NVMe-oF target no longer supports connecting scheduling configuration and instead always uses what was previously called "transport" scheduling. -`spdk_nvmf_tgt_accept` no longer takes a function pointer as an argument. New connections -are automatically assigned to poll groups by the underlying transport. Further, -`spdk_nvmf_transport_ops` has changed such that the accept function pointer no longer -takes a function pointer as an argument. Instead, transports should call +`spdk_nvmf_tgt_accept` no longer exists. The accept process now occurs automatically after +the creation of an nvmf target and queue pairs are assigned to poll groups by the underlying +transport. Further, `spdk_nvmf_transport_ops` has changed such that the accept function +pointer no longer takes a function pointer as an argument. Instead, transports should call `spdk_nvmf_tgt_new_qpair` whenever they previously would have called that callback. The NVMe-oF target now supports aborting any submitted NVM or Admin command. Previously, diff --git a/doc/nvmf_tgt_pg.md b/doc/nvmf_tgt_pg.md index 2287b7e31..939817b72 100644 --- a/doc/nvmf_tgt_pg.md +++ b/doc/nvmf_tgt_pg.md @@ -83,7 +83,8 @@ Namespaces are bdevs. See @ref bdev for more information about the SPDK bdev layer. A bdev may be obtained by calling spdk_bdev_get_by_name(). Once a subsystem exists and the target is listening on an address, new -connections may be accepted by polling spdk_nvmf_tgt_accept(). +connections will be automatically assigned to poll groups as they are +detected. All I/O to a subsystem is driven by a poll group, which polls for incoming network I/O. Poll groups may be created by calling @@ -91,13 +92,6 @@ spdk_nvmf_poll_group_create(). They automatically request to begin polling upon creation on the thread from which they were created. Most importantly, *a poll group may only be accessed from the thread on which it was created.* -When spdk_nvmf_tgt_accept() detects a new connection, it chooses an optimal -poll group by first calling spdk_nvmf_get_optimal_poll_group(), which calls down -into the transport, and then assigns the qpair to the optimal poll group by -calling spdk_nvmf_poll_group_add(). This all happens within the NVMe-oF target -library and the NVMe-oF target application is not required to do anything other -than continue to periodically poll spdk_nvmf_tgt_accept(). - ## Access Control Access control is performed at the subsystem level by adding allowed listen @@ -110,9 +104,7 @@ and hosts may only be added to inactive or paused subsystems. A discovery subsystem, as defined by the NVMe-oF specification, is automatically created for each NVMe-oF target constructed. Connections to the -discovery subsystem are handled in the same way as any other subsystem - new -qpairs are created in response to spdk_nvmf_tgt_accept() and they must be -assigned to a poll group. +discovery subsystem are handled in the same way as any other subsystem. ## Transports @@ -131,15 +123,7 @@ fabrics simultaneously. The SPDK NVMe-oF target library does not strictly dictate threading model, but poll groups do all of their polling and I/O processing on the thread they are created on. Given that, it almost always makes sense to create one poll group -per thread used in the application. New qpairs created in response to -spdk_nvmf_tgt_accept() can be handed out round-robin to the poll groups. This -is how the SPDK NVMe-oF target application currently functions. - -More advanced algorithms for distributing qpairs to poll groups are possible. -For instance, a NUMA-aware algorithm would be an improvement over basic -round-robin, where NUMA-aware means assigning qpairs to poll groups running on -CPU cores that are on the same NUMA node as the network adapter and storage -device. Load-aware algorithms also may have benefits. +per thread used in the application. ## Scaling Across CPU Cores diff --git a/examples/nvmf/nvmf/nvmf.c b/examples/nvmf/nvmf/nvmf.c index 3b14e5a9c..6ee7b44f8 100644 --- a/examples/nvmf/nvmf/nvmf.c +++ b/examples/nvmf/nvmf/nvmf.c @@ -54,11 +54,9 @@ enum nvmf_target_state { NVMF_INIT_TARGET, NVMF_INIT_POLL_GROUPS, NVMF_INIT_START_SUBSYSTEMS, - NVMF_INIT_START_ACCEPTOR, NVMF_RUNNING, NVMF_FINI_STOP_SUBSYSTEMS, NVMF_FINI_POLL_GROUPS, - NVMF_FINI_STOP_ACCEPTOR, NVMF_FINI_TARGET, NVMF_FINI_SUBSYSTEM, }; @@ -99,7 +97,7 @@ static struct spdk_thread *g_fini_thread = NULL; static struct nvmf_target g_nvmf_tgt = { .max_subsystems = NVMF_DEFAULT_SUBSYSTEMS, }; -static struct spdk_poller *g_acceptor_poller = NULL; + static struct nvmf_target_poll_group *g_next_pg = NULL; static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER; static bool g_reactors_exit = false; @@ -528,16 +526,6 @@ nvmf_tgt_stop_subsystems(struct nvmf_target *nvmf_tgt) } } -static int -nvmf_tgt_acceptor_poll(void *arg) -{ - struct nvmf_target *nvmf_tgt = arg; - - spdk_nvmf_tgt_accept(nvmf_tgt->tgt); - - return -1; -} - static void nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) @@ -558,7 +546,7 @@ nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem, fprintf(stdout, "all subsystems of target started\n"); - g_target_state = NVMF_INIT_START_ACCEPTOR; + g_target_state = NVMF_RUNNING; nvmf_target_advance_state(); } @@ -587,7 +575,7 @@ nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt) g_target_state = NVMF_FINI_STOP_SUBSYSTEMS; } } else { - g_target_state = NVMF_INIT_START_ACCEPTOR; + g_target_state = NVMF_RUNNING; } } @@ -670,7 +658,7 @@ _nvmf_tgt_destroy_poll_groups_done(void *ctx) if (--g_num_poll_groups == 0) { fprintf(stdout, "destroy targets's poll groups done\n"); - g_target_state = NVMF_FINI_STOP_ACCEPTOR; + g_target_state = NVMF_FINI_TARGET; nvmf_target_advance_state(); } } @@ -779,12 +767,6 @@ nvmf_target_advance_state(void) case NVMF_INIT_START_SUBSYSTEMS: nvmf_tgt_start_subsystems(&g_nvmf_tgt); break; - case NVMF_INIT_START_ACCEPTOR: - g_acceptor_poller = SPDK_POLLER_REGISTER(nvmf_tgt_acceptor_poll, &g_nvmf_tgt, - g_acceptor_poll_rate); - fprintf(stdout, "Acceptor running\n"); - g_target_state = NVMF_RUNNING; - break; case NVMF_RUNNING: fprintf(stdout, "nvmf target is running\n"); if (g_migrate_pg_period_us != 0) { @@ -799,10 +781,6 @@ nvmf_target_advance_state(void) case NVMF_FINI_POLL_GROUPS: nvmf_poll_groups_destroy(); break; - case NVMF_FINI_STOP_ACCEPTOR: - spdk_poller_unregister(&g_acceptor_poller); - g_target_state = NVMF_FINI_TARGET; - break; case NVMF_FINI_TARGET: nvmf_destroy_nvmf_tgt(); break; diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h index 99be1fcfe..980dd08fe 100644 --- a/include/spdk/nvmf.h +++ b/include/spdk/nvmf.h @@ -68,6 +68,7 @@ struct spdk_nvmf_transport; struct spdk_nvmf_target_opts { char name[NVMF_TGT_NAME_MAX_LENGTH]; uint32_t max_subsystems; + uint32_t acceptor_poll_rate; }; struct spdk_nvmf_transport_opts { @@ -228,13 +229,6 @@ int spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt, int spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt, struct spdk_nvme_transport_id *trid); -/** - * Poll the target for incoming connections. - * - * \param tgt The target associated with the listen address. - */ -uint32_t spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt); - /** * Create a poll group. * diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c index ccd3c6943..dc8826a01 100644 --- a/lib/nvmf/nvmf.c +++ b/lib/nvmf/nvmf.c @@ -50,6 +50,7 @@ SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF) #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024 +#define SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 10000 static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts); @@ -231,10 +232,25 @@ nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group) _nvmf_tgt_disconnect_next_qpair(ctx); } +static int +nvmf_tgt_accept(void *ctx) +{ + struct spdk_nvmf_tgt *tgt = ctx; + struct spdk_nvmf_transport *transport, *tmp; + int count = 0; + + TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) { + count += nvmf_transport_accept(transport); + } + + return count; +} + struct spdk_nvmf_tgt * spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts) { struct spdk_nvmf_tgt *tgt, *tmp_tgt; + uint32_t acceptor_poll_rate; if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) { SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH); @@ -261,6 +277,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts) tgt->max_subsystems = opts->max_subsystems; } + if (!opts || !opts->acceptor_poll_rate) { + acceptor_poll_rate = SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US; + } else { + acceptor_poll_rate = opts->acceptor_poll_rate; + } + tgt->discovery_genctr = 0; TAILQ_INIT(&tgt->transports); TAILQ_INIT(&tgt->poll_groups); @@ -273,7 +295,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts) pthread_mutex_init(&tgt->mutex, NULL); - TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link); + tgt->accept_poller = SPDK_POLLER_REGISTER(nvmf_tgt_accept, tgt, acceptor_poll_rate); + if (!tgt->accept_poller) { + free(tgt->subsystems); + free(tgt); + return NULL; + } spdk_io_device_register(tgt, nvmf_tgt_create_poll_group, @@ -281,6 +308,8 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts) sizeof(struct spdk_nvmf_poll_group), tgt->name); + TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link); + return tgt; } @@ -326,6 +355,8 @@ spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt, tgt->destroy_cb_fn = cb_fn; tgt->destroy_cb_arg = cb_arg; + spdk_poller_unregister(&tgt->accept_poller); + TAILQ_REMOVE(&g_nvmf_tgts, tgt, link); spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb); @@ -774,19 +805,6 @@ spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx); } -uint32_t -spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt) -{ - struct spdk_nvmf_transport *transport, *tmp; - uint32_t count = 0; - - TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) { - count += nvmf_transport_accept(transport); - } - - return count; -} - struct spdk_nvmf_poll_group * spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt) { diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h index ac48a34a5..179ae6761 100644 --- a/lib/nvmf/nvmf_internal.h +++ b/lib/nvmf/nvmf_internal.h @@ -67,6 +67,8 @@ struct spdk_nvmf_tgt { uint64_t discovery_genctr; + struct spdk_poller *accept_poller; + uint32_t max_subsystems; /* Array of subsystem pointers of size max_subsystems indexed by sid */ diff --git a/lib/nvmf/spdk_nvmf.map b/lib/nvmf/spdk_nvmf.map index c6b9e601f..b6fc9ca39 100644 --- a/lib/nvmf/spdk_nvmf.map +++ b/lib/nvmf/spdk_nvmf.map @@ -11,7 +11,6 @@ spdk_nvmf_tgt_write_config_json; spdk_nvmf_tgt_listen; spdk_nvmf_tgt_stop_listen; - spdk_nvmf_tgt_accept; spdk_nvmf_poll_group_create; spdk_nvmf_get_optimal_poll_group; spdk_nvmf_poll_group_destroy; diff --git a/module/event/subsystems/nvmf/conf.c b/module/event/subsystems/nvmf/conf.c index b92a92acc..1324f3699 100644 --- a/module/event/subsystems/nvmf/conf.c +++ b/module/event/subsystems/nvmf/conf.c @@ -199,6 +199,7 @@ nvmf_parse_nvmf_tgt(void) } opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems; + opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf->acceptor_poll_rate; g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts); g_spdk_nvmf_tgt_max_subsystems = 0; diff --git a/module/event/subsystems/nvmf/nvmf_tgt.c b/module/event/subsystems/nvmf/nvmf_tgt.c index 7bf4ef357..0f516a545 100644 --- a/module/event/subsystems/nvmf/nvmf_tgt.c +++ b/module/event/subsystems/nvmf/nvmf_tgt.c @@ -46,11 +46,9 @@ enum nvmf_tgt_state { NVMF_TGT_INIT_PARSE_CONFIG, NVMF_TGT_INIT_CREATE_POLL_GROUPS, NVMF_TGT_INIT_START_SUBSYSTEMS, - NVMF_TGT_INIT_START_ACCEPTOR, NVMF_TGT_RUNNING, NVMF_TGT_FINI_STOP_SUBSYSTEMS, NVMF_TGT_FINI_DESTROY_POLL_GROUPS, - NVMF_TGT_FINI_STOP_ACCEPTOR, NVMF_TGT_FINI_FREE_RESOURCES, NVMF_TGT_STOPPED, NVMF_TGT_ERROR, @@ -72,8 +70,6 @@ static struct spdk_thread *g_tgt_fini_thread = NULL; static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups); static size_t g_num_poll_groups = 0; -static struct spdk_poller *g_acceptor_poller = NULL; - static void nvmf_tgt_advance_state(void); static void @@ -103,28 +99,13 @@ nvmf_subsystem_fini(void) nvmf_shutdown_cb(NULL); } -static int -acceptor_poll(void *arg) -{ - struct spdk_nvmf_tgt *tgt = arg; - uint32_t count; - - count = spdk_nvmf_tgt_accept(tgt); - - if (count > 0) { - return SPDK_POLLER_BUSY; - } else { - return SPDK_POLLER_IDLE; - } -} - static void _nvmf_tgt_destroy_poll_group_done(void *ctx) { assert(g_num_poll_groups > 0); if (--g_num_poll_groups == 0) { - g_tgt_state = NVMF_TGT_FINI_STOP_ACCEPTOR; + g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES; nvmf_tgt_advance_state(); } } @@ -236,7 +217,7 @@ nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem, return; } - g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR; + g_tgt_state = NVMF_TGT_RUNNING; nvmf_tgt_advance_state(); } @@ -406,15 +387,10 @@ nvmf_tgt_advance_state(void) g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS; } } else { - g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR; + g_tgt_state = NVMF_TGT_RUNNING; } break; } - case NVMF_TGT_INIT_START_ACCEPTOR: - g_acceptor_poller = SPDK_POLLER_REGISTER(acceptor_poll, g_spdk_nvmf_tgt, - g_spdk_nvmf_tgt_conf->acceptor_poll_rate); - g_tgt_state = NVMF_TGT_RUNNING; - break; case NVMF_TGT_RUNNING: spdk_subsystem_init_next(0); break; @@ -437,10 +413,6 @@ nvmf_tgt_advance_state(void) /* Send a message to each poll group thread, and terminate the thread */ nvmf_tgt_destroy_poll_groups(); break; - case NVMF_TGT_FINI_STOP_ACCEPTOR: - spdk_poller_unregister(&g_acceptor_poller); - g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES; - break; case NVMF_TGT_FINI_FREE_RESOURCES: spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL); break;