bdev: prepopulate per-thread bdev_io cache

This helps prevent starvation in the case where a thread is started but remains idle, while other threads consume all of the spdk_bdev_io buffers in the global pool. This starvation issue is fairly theoretical at this point, but future patches will be adding the ability for callers to be notified when an spdk_bdev_io becomes available if the pool is exhausted. We will add tests to stress pool exhaustion at which point this patch will become much more important. While here, increase the minimum bdev_io_pool_size to account for the mgmt_ch getting destroyed and then immediately created again on the master core. In this case there is a window where both channels exist at once - the one being destroyed won't free its cached spdk_bdev_ios until the deferred spdk_put_io_channel event executes. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I3a2fc80bc2bfd78b098bcbfce456d7a433cd64e9 Reviewed-on: https://review.gerrithub.io/415039 Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
2018-06-13 01:35:15 -07:00 · 2018-06-13 01:35:15 -07:00 · ed6827edbf
commit ed6827edbf
parent b10f6b1c8c
1 changed files with 21 additions and 2 deletions
--- a/lib/bdev/bdev.c
+++ b/lib/bdev/bdev.c
@ -264,10 +264,19 @@ spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
 int
 spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
 {
-	if (opts->bdev_io_pool_size < opts->bdev_io_cache_size * spdk_thread_get_count()) {
+	uint32_t min_pool_size;
+
+	/*
+	 * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
+	 *  initialization.  A second mgmt_ch will be created on the same thread when the application starts
+	 *  but before the deferred put_io_channel event is executed for the first mgmt_ch.
+	 */
+	min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
+	if (opts->bdev_io_pool_size < min_pool_size) {
 		SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
 			    " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
 			    spdk_thread_get_count());
+		SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
 		return -1;
 	}

@ -512,14 +521,24 @@ static int
 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
 {
 	struct spdk_bdev_mgmt_channel *ch = ctx_buf;
+	struct spdk_bdev_io *bdev_io;
+	uint32_t i;

 	STAILQ_INIT(&ch->need_buf_small);
 	STAILQ_INIT(&ch->need_buf_large);

 	STAILQ_INIT(&ch->per_thread_cache);
-	ch->per_thread_cache_count = 0;
 	ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;

+	/* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
+	ch->per_thread_cache_count = 0;
+	for (i = 0; i < ch->bdev_io_cache_size; i++) {
+		bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
+		assert(bdev_io != NULL);
+		ch->per_thread_cache_count++;
+		STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
+	}
+
 	TAILQ_INIT(&ch->shared_resources);

 	return 0;