bdev/nvme: Disconnect and then free I/O qpair when deleting ctrlr_channel

For RDMA transport, current synchronous qpair disconnect occupied CPU for
a second when qpair disconnect gets timeout.

To remove this limitation, we will do the following:
- make spdk_nvme_ctrlr_disconnect_io_qpair() asynchronous,
- spdk_nvme_qpair_process_completions() returns -ENXIO only if the
  qpair is actually disconnected.

Even at this patch, spdk_nvme_poll_group_process_completions() invokes
disconnected_qpair_cb only if a qpair is actually disconnected. This
behavior will be maintained.

To use the upcoming asynchronous qpair disconnect easily, when
deleting a ctrlr_channel, disconnect the qpair, and then free the qpair
and release a reference to the poll group when the qpair is actually
disconnected.

We need to delete a nvme_qpair asynchronously after the corresponding
nvme_ctrlr_channel is deleted and defer the deletion of the corresponding
nvme_ctrlr until the nvme_qpair is deleted. To satisfy this requirement,
utilize the reference count of the nvme_ctrlr.

disconnected_qpair_cb() may call spdk_nvme_ctrlr_free_io_qpair() and
spdk_io_device_unregister() successively. The spdk_io_device_unregister()
will execute spdk_nvme_detach_async() from its callback.
spdk_nvme_ctrlr_free_io_qpair() has to complete earlier than
spdk_nvme_detach_async() starts. spdk_nvme_ctrlr_free_io_qpair() is
executed after unwinding stack. spdk_nvme_detach_async() is executed
after sending a message. Sending message is later than unwinding stack.
Hence the requirement is satisfied naturally.

spdk_io_device_unregister() for the nvme_ctrlr is required to be called
on the nvme_ctrlr->thread. To satisfy this requirement, redirect
nvme_ctrlr_unregister() to the nvme_ctrlr->thread. This change is too
small to stand as an independent patch. So include the change in this
patch.

Change-Id: Id8c01966c40b1dae9c4ef17f1b0b3f60a0bd17d5
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10765
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-03-09 15:48:11 +09:00 committed by Tomasz Zawadzki
parent 1285481917
commit 068ca77ab2

View File

@ -515,8 +515,10 @@ nvme_ctrlr_unregister_cb(void *io_device)
}
static void
nvme_ctrlr_unregister(struct nvme_ctrlr *nvme_ctrlr)
nvme_ctrlr_unregister(void *ctx)
{
struct nvme_ctrlr *nvme_ctrlr = ctx;
spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
}
@ -558,7 +560,7 @@ nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
pthread_mutex_unlock(&nvme_ctrlr->mutex);
nvme_ctrlr_unregister(nvme_ctrlr);
spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
}
static struct nvme_io_path *
@ -1095,6 +1097,8 @@ nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair
return nvme_qpair;
}
static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
static void
bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
{
@ -1115,7 +1119,12 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_
_bdev_nvme_clear_io_path_cache(nvme_qpair);
bdev_nvme_reset(nvme_qpair->ctrlr);
if (nvme_qpair->ctrlr_ch != NULL) {
bdev_nvme_reset(nvme_qpair->ctrlr);
} else {
/* In this case, ctrlr_channel is already deleted. */
nvme_qpair_delete(nvme_qpair);
}
}
}
@ -2105,6 +2114,10 @@ nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrl
ctrlr_ch->qpair = nvme_qpair;
pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
nvme_qpair->ctrlr->ref++;
pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
return 0;
}
@ -2128,6 +2141,8 @@ nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
nvme_ctrlr_release(nvme_qpair->ctrlr);
free(nvme_qpair);
}
@ -2143,11 +2158,16 @@ bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
_bdev_nvme_clear_io_path_cache(nvme_qpair);
if (nvme_qpair->qpair != NULL) {
spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
nvme_qpair->qpair = NULL;
}
spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
nvme_qpair_delete(nvme_qpair);
/* We cannot release a reference to the poll group now.
* The qpair may be disconnected asynchronously later.
* We need to poll it until it is actually disconnected.
*/
nvme_qpair->ctrlr_ch = NULL;
} else {
nvme_qpair_delete(nvme_qpair);
}
}
static void