From 77ab6f2839fdd4cdb5969aec456895931d02cd91 Mon Sep 17 00:00:00 2001
From: Ben Walker <benjamin.walker@intel.com>
Date: Mon, 29 Jun 2020 10:56:47 -0700
Subject: [PATCH] nvmf: Eliminate spdk_nvmf_tgt_accept()

The poller is now created internally to the library whenever a target
is constructed. Applications are not expected to poll for connections
any longer.

Change-Id: I523eb6adcc042c1ba2ed41b1cb41256b8bf63772
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3583
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
---
 CHANGELOG.md                            |  8 ++---
 doc/nvmf_tgt_pg.md                      | 24 +++----------
 examples/nvmf/nvmf/nvmf.c               | 30 +++-------------
 include/spdk/nvmf.h                     |  8 +----
 lib/nvmf/nvmf.c                         | 46 +++++++++++++++++--------
 lib/nvmf/nvmf_internal.h                |  2 ++
 lib/nvmf/spdk_nvmf.map                  |  1 -
 module/event/subsystems/nvmf/conf.c     |  1 +
 module/event/subsystems/nvmf/nvmf_tgt.c | 34 ++----------------
 9 files changed, 51 insertions(+), 103 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 247e9582d..75573e295 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -117,10 +117,10 @@ implementations of the transport interface will have to implement that function.
 The NVMe-oF target no longer supports connecting scheduling configuration and instead
 always uses what was previously called "transport" scheduling.
 
-`spdk_nvmf_tgt_accept` no longer takes a function pointer as an argument. New connections
-are automatically assigned to poll groups by the underlying transport. Further,
-`spdk_nvmf_transport_ops` has changed such that the accept function pointer no longer
-takes a function pointer as an argument. Instead, transports should call
+`spdk_nvmf_tgt_accept` no longer exists. The accept process now occurs automatically after
+the creation of an nvmf target and queue pairs are assigned to poll groups by the underlying
+transport. Further, `spdk_nvmf_transport_ops` has changed such that the accept function
+pointer no longer takes a function pointer as an argument. Instead, transports should call
 `spdk_nvmf_tgt_new_qpair` whenever they previously would have called that callback.
 
 The NVMe-oF target now supports aborting any submitted NVM or Admin command. Previously,
diff --git a/doc/nvmf_tgt_pg.md b/doc/nvmf_tgt_pg.md
index 2287b7e31..939817b72 100644
--- a/doc/nvmf_tgt_pg.md
+++ b/doc/nvmf_tgt_pg.md
@@ -83,7 +83,8 @@ Namespaces are bdevs. See @ref bdev for more information about the SPDK bdev
 layer. A bdev may be obtained by calling spdk_bdev_get_by_name().
 
 Once a subsystem exists and the target is listening on an address, new
-connections may be accepted by polling spdk_nvmf_tgt_accept().
+connections will be automatically assigned to poll groups as they are
+detected.
 
 All I/O to a subsystem is driven by a poll group, which polls for incoming
 network I/O. Poll groups may be created by calling
@@ -91,13 +92,6 @@ spdk_nvmf_poll_group_create(). They automatically request to begin polling
 upon creation on the thread from which they were created. Most importantly, *a
 poll group may only be accessed from the thread on which it was created.*
 
-When spdk_nvmf_tgt_accept() detects a new connection, it chooses an optimal
-poll group by first calling spdk_nvmf_get_optimal_poll_group(), which calls down
-into the transport, and then assigns the qpair to the optimal poll group by
-calling spdk_nvmf_poll_group_add(). This all happens within the NVMe-oF target
-library and the NVMe-oF target application is not required to do anything other
-than continue to periodically poll spdk_nvmf_tgt_accept().
-
 ## Access Control
 
 Access control is performed at the subsystem level by adding allowed listen
@@ -110,9 +104,7 @@ and hosts may only be added to inactive or paused subsystems.
 
 A discovery subsystem, as defined by the NVMe-oF specification, is
 automatically created for each NVMe-oF target constructed. Connections to the
-discovery subsystem are handled in the same way as any other subsystem - new
-qpairs are created in response to spdk_nvmf_tgt_accept() and they must be
-assigned to a poll group.
+discovery subsystem are handled in the same way as any other subsystem.
 
 ## Transports
 
@@ -131,15 +123,7 @@ fabrics simultaneously.
 The SPDK NVMe-oF target library does not strictly dictate threading model, but
 poll groups do all of their polling and I/O processing on the thread they are
 created on. Given that, it almost always makes sense to create one poll group
-per thread used in the application. New qpairs created in response to
-spdk_nvmf_tgt_accept() can be handed out round-robin to the poll groups. This
-is how the SPDK NVMe-oF target application currently functions.
-
-More advanced algorithms for distributing qpairs to poll groups are possible.
-For instance, a NUMA-aware algorithm would be an improvement over basic
-round-robin, where NUMA-aware means assigning qpairs to poll groups running on
-CPU cores that are on the same NUMA node as the network adapter and storage
-device. Load-aware algorithms also may have benefits.
+per thread used in the application.
 
 ## Scaling Across CPU Cores
 
diff --git a/examples/nvmf/nvmf/nvmf.c b/examples/nvmf/nvmf/nvmf.c
index 3b14e5a9c..6ee7b44f8 100644
--- a/examples/nvmf/nvmf/nvmf.c
+++ b/examples/nvmf/nvmf/nvmf.c
@@ -54,11 +54,9 @@ enum nvmf_target_state {
 	NVMF_INIT_TARGET,
 	NVMF_INIT_POLL_GROUPS,
 	NVMF_INIT_START_SUBSYSTEMS,
-	NVMF_INIT_START_ACCEPTOR,
 	NVMF_RUNNING,
 	NVMF_FINI_STOP_SUBSYSTEMS,
 	NVMF_FINI_POLL_GROUPS,
-	NVMF_FINI_STOP_ACCEPTOR,
 	NVMF_FINI_TARGET,
 	NVMF_FINI_SUBSYSTEM,
 };
@@ -99,7 +97,7 @@ static struct spdk_thread *g_fini_thread = NULL;
 static struct nvmf_target g_nvmf_tgt = {
 	.max_subsystems = NVMF_DEFAULT_SUBSYSTEMS,
 };
-static struct spdk_poller *g_acceptor_poller = NULL;
+
 static struct nvmf_target_poll_group *g_next_pg = NULL;
 static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
 static bool g_reactors_exit = false;
@@ -528,16 +526,6 @@ nvmf_tgt_stop_subsystems(struct nvmf_target *nvmf_tgt)
 	}
 }
 
-static int
-nvmf_tgt_acceptor_poll(void *arg)
-{
-	struct nvmf_target *nvmf_tgt = arg;
-
-	spdk_nvmf_tgt_accept(nvmf_tgt->tgt);
-
-	return -1;
-}
-
 static void
 nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem,
 			      void *cb_arg, int status)
@@ -558,7 +546,7 @@ nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem,
 
 	fprintf(stdout, "all subsystems of target started\n");
 
-	g_target_state = NVMF_INIT_START_ACCEPTOR;
+	g_target_state = NVMF_RUNNING;
 	nvmf_target_advance_state();
 }
 
@@ -587,7 +575,7 @@ nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt)
 			g_target_state = NVMF_FINI_STOP_SUBSYSTEMS;
 		}
 	} else {
-		g_target_state = NVMF_INIT_START_ACCEPTOR;
+		g_target_state = NVMF_RUNNING;
 	}
 }
 
@@ -670,7 +658,7 @@ _nvmf_tgt_destroy_poll_groups_done(void *ctx)
 	if (--g_num_poll_groups == 0) {
 		fprintf(stdout, "destroy targets's poll groups done\n");
 
-		g_target_state = NVMF_FINI_STOP_ACCEPTOR;
+		g_target_state = NVMF_FINI_TARGET;
 		nvmf_target_advance_state();
 	}
 }
@@ -779,12 +767,6 @@ nvmf_target_advance_state(void)
 		case NVMF_INIT_START_SUBSYSTEMS:
 			nvmf_tgt_start_subsystems(&g_nvmf_tgt);
 			break;
-		case NVMF_INIT_START_ACCEPTOR:
-			g_acceptor_poller = SPDK_POLLER_REGISTER(nvmf_tgt_acceptor_poll, &g_nvmf_tgt,
-					    g_acceptor_poll_rate);
-			fprintf(stdout, "Acceptor running\n");
-			g_target_state = NVMF_RUNNING;
-			break;
 		case NVMF_RUNNING:
 			fprintf(stdout, "nvmf target is running\n");
 			if (g_migrate_pg_period_us != 0) {
@@ -799,10 +781,6 @@ nvmf_target_advance_state(void)
 		case NVMF_FINI_POLL_GROUPS:
 			nvmf_poll_groups_destroy();
 			break;
-		case NVMF_FINI_STOP_ACCEPTOR:
-			spdk_poller_unregister(&g_acceptor_poller);
-			g_target_state = NVMF_FINI_TARGET;
-			break;
 		case NVMF_FINI_TARGET:
 			nvmf_destroy_nvmf_tgt();
 			break;
diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h
index 99be1fcfe..980dd08fe 100644
--- a/include/spdk/nvmf.h
+++ b/include/spdk/nvmf.h
@@ -68,6 +68,7 @@ struct spdk_nvmf_transport;
 struct spdk_nvmf_target_opts {
 	char		name[NVMF_TGT_NAME_MAX_LENGTH];
 	uint32_t	max_subsystems;
+	uint32_t	acceptor_poll_rate;
 };
 
 struct spdk_nvmf_transport_opts {
@@ -228,13 +229,6 @@ int spdk_nvmf_tgt_listen(struct spdk_nvmf_tgt *tgt,
 int spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
 			      struct spdk_nvme_transport_id *trid);
 
-/**
- * Poll the target for incoming connections.
- *
- * \param tgt The target associated with the listen address.
- */
-uint32_t spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt);
-
 /**
  * Create a poll group.
  *
diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c
index ccd3c6943..dc8826a01 100644
--- a/lib/nvmf/nvmf.c
+++ b/lib/nvmf/nvmf.c
@@ -50,6 +50,7 @@
 SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
 
 #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
+#define SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 10000
 
 static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
 
@@ -231,10 +232,25 @@ nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
 	_nvmf_tgt_disconnect_next_qpair(ctx);
 }
 
+static int
+nvmf_tgt_accept(void *ctx)
+{
+	struct spdk_nvmf_tgt *tgt = ctx;
+	struct spdk_nvmf_transport *transport, *tmp;
+	int count = 0;
+
+	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
+		count += nvmf_transport_accept(transport);
+	}
+
+	return count;
+}
+
 struct spdk_nvmf_tgt *
 spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
 {
 	struct spdk_nvmf_tgt *tgt, *tmp_tgt;
+	uint32_t acceptor_poll_rate;
 
 	if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
 		SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
@@ -261,6 +277,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
 		tgt->max_subsystems = opts->max_subsystems;
 	}
 
+	if (!opts || !opts->acceptor_poll_rate) {
+		acceptor_poll_rate = SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US;
+	} else {
+		acceptor_poll_rate = opts->acceptor_poll_rate;
+	}
+
 	tgt->discovery_genctr = 0;
 	TAILQ_INIT(&tgt->transports);
 	TAILQ_INIT(&tgt->poll_groups);
@@ -273,7 +295,12 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
 
 	pthread_mutex_init(&tgt->mutex, NULL);
 
-	TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
+	tgt->accept_poller = SPDK_POLLER_REGISTER(nvmf_tgt_accept, tgt, acceptor_poll_rate);
+	if (!tgt->accept_poller) {
+		free(tgt->subsystems);
+		free(tgt);
+		return NULL;
+	}
 
 	spdk_io_device_register(tgt,
 				nvmf_tgt_create_poll_group,
@@ -281,6 +308,8 @@ spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
 				sizeof(struct spdk_nvmf_poll_group),
 				tgt->name);
 
+	TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
+
 	return tgt;
 }
 
@@ -326,6 +355,8 @@ spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
 	tgt->destroy_cb_fn = cb_fn;
 	tgt->destroy_cb_arg = cb_arg;
 
+	spdk_poller_unregister(&tgt->accept_poller);
+
 	TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
 
 	spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
@@ -774,19 +805,6 @@ spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair
 	spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
 }
 
-uint32_t
-spdk_nvmf_tgt_accept(struct spdk_nvmf_tgt *tgt)
-{
-	struct spdk_nvmf_transport *transport, *tmp;
-	uint32_t count = 0;
-
-	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
-		count += nvmf_transport_accept(transport);
-	}
-
-	return count;
-}
-
 struct spdk_nvmf_poll_group *
 spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
 {
diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h
index ac48a34a5..179ae6761 100644
--- a/lib/nvmf/nvmf_internal.h
+++ b/lib/nvmf/nvmf_internal.h
@@ -67,6 +67,8 @@ struct spdk_nvmf_tgt {
 
 	uint64_t				discovery_genctr;
 
+	struct spdk_poller			*accept_poller;
+
 	uint32_t				max_subsystems;
 
 	/* Array of subsystem pointers of size max_subsystems indexed by sid */
diff --git a/lib/nvmf/spdk_nvmf.map b/lib/nvmf/spdk_nvmf.map
index c6b9e601f..b6fc9ca39 100644
--- a/lib/nvmf/spdk_nvmf.map
+++ b/lib/nvmf/spdk_nvmf.map
@@ -11,7 +11,6 @@
 	spdk_nvmf_tgt_write_config_json;
 	spdk_nvmf_tgt_listen;
 	spdk_nvmf_tgt_stop_listen;
-	spdk_nvmf_tgt_accept;
 	spdk_nvmf_poll_group_create;
 	spdk_nvmf_get_optimal_poll_group;
 	spdk_nvmf_poll_group_destroy;
diff --git a/module/event/subsystems/nvmf/conf.c b/module/event/subsystems/nvmf/conf.c
index b92a92acc..1324f3699 100644
--- a/module/event/subsystems/nvmf/conf.c
+++ b/module/event/subsystems/nvmf/conf.c
@@ -199,6 +199,7 @@ nvmf_parse_nvmf_tgt(void)
 	}
 
 	opts.max_subsystems = g_spdk_nvmf_tgt_max_subsystems;
+	opts.acceptor_poll_rate = g_spdk_nvmf_tgt_conf->acceptor_poll_rate;
 	g_spdk_nvmf_tgt = spdk_nvmf_tgt_create(&opts);
 
 	g_spdk_nvmf_tgt_max_subsystems = 0;
diff --git a/module/event/subsystems/nvmf/nvmf_tgt.c b/module/event/subsystems/nvmf/nvmf_tgt.c
index 7bf4ef357..0f516a545 100644
--- a/module/event/subsystems/nvmf/nvmf_tgt.c
+++ b/module/event/subsystems/nvmf/nvmf_tgt.c
@@ -46,11 +46,9 @@ enum nvmf_tgt_state {
 	NVMF_TGT_INIT_PARSE_CONFIG,
 	NVMF_TGT_INIT_CREATE_POLL_GROUPS,
 	NVMF_TGT_INIT_START_SUBSYSTEMS,
-	NVMF_TGT_INIT_START_ACCEPTOR,
 	NVMF_TGT_RUNNING,
 	NVMF_TGT_FINI_STOP_SUBSYSTEMS,
 	NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
-	NVMF_TGT_FINI_STOP_ACCEPTOR,
 	NVMF_TGT_FINI_FREE_RESOURCES,
 	NVMF_TGT_STOPPED,
 	NVMF_TGT_ERROR,
@@ -72,8 +70,6 @@ static struct spdk_thread *g_tgt_fini_thread = NULL;
 static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
 static size_t g_num_poll_groups = 0;
 
-static struct spdk_poller *g_acceptor_poller = NULL;
-
 static void nvmf_tgt_advance_state(void);
 
 static void
@@ -103,28 +99,13 @@ nvmf_subsystem_fini(void)
 	nvmf_shutdown_cb(NULL);
 }
 
-static int
-acceptor_poll(void *arg)
-{
-	struct spdk_nvmf_tgt *tgt = arg;
-	uint32_t count;
-
-	count = spdk_nvmf_tgt_accept(tgt);
-
-	if (count > 0) {
-		return SPDK_POLLER_BUSY;
-	} else {
-		return SPDK_POLLER_IDLE;
-	}
-}
-
 static void
 _nvmf_tgt_destroy_poll_group_done(void *ctx)
 {
 	assert(g_num_poll_groups > 0);
 
 	if (--g_num_poll_groups == 0) {
-		g_tgt_state = NVMF_TGT_FINI_STOP_ACCEPTOR;
+		g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
 		nvmf_tgt_advance_state();
 	}
 }
@@ -236,7 +217,7 @@ nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
 		return;
 	}
 
-	g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
+	g_tgt_state = NVMF_TGT_RUNNING;
 	nvmf_tgt_advance_state();
 }
 
@@ -406,15 +387,10 @@ nvmf_tgt_advance_state(void)
 					g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
 				}
 			} else {
-				g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
+				g_tgt_state = NVMF_TGT_RUNNING;
 			}
 			break;
 		}
-		case NVMF_TGT_INIT_START_ACCEPTOR:
-			g_acceptor_poller = SPDK_POLLER_REGISTER(acceptor_poll, g_spdk_nvmf_tgt,
-					    g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
-			g_tgt_state = NVMF_TGT_RUNNING;
-			break;
 		case NVMF_TGT_RUNNING:
 			spdk_subsystem_init_next(0);
 			break;
@@ -437,10 +413,6 @@ nvmf_tgt_advance_state(void)
 			/* Send a message to each poll group thread, and terminate the thread */
 			nvmf_tgt_destroy_poll_groups();
 			break;
-		case NVMF_TGT_FINI_STOP_ACCEPTOR:
-			spdk_poller_unregister(&g_acceptor_poller);
-			g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
-			break;
 		case NVMF_TGT_FINI_FREE_RESOURCES:
 			spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
 			break;