From 068ca77ab2ab62436d415edab4883753a2ebd0b0 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Wed, 9 Mar 2022 15:48:11 +0900 Subject: [PATCH] bdev/nvme: Disconnect and then free I/O qpair when deleting ctrlr_channel For RDMA transport, current synchronous qpair disconnect occupied CPU for a second when qpair disconnect gets timeout. To remove this limitation, we will do the following: - make spdk_nvme_ctrlr_disconnect_io_qpair() asynchronous, - spdk_nvme_qpair_process_completions() returns -ENXIO only if the qpair is actually disconnected. Even at this patch, spdk_nvme_poll_group_process_completions() invokes disconnected_qpair_cb only if a qpair is actually disconnected. This behavior will be maintained. To use the upcoming asynchronous qpair disconnect easily, when deleting a ctrlr_channel, disconnect the qpair, and then free the qpair and release a reference to the poll group when the qpair is actually disconnected. We need to delete a nvme_qpair asynchronously after the corresponding nvme_ctrlr_channel is deleted and defer the deletion of the corresponding nvme_ctrlr until the nvme_qpair is deleted. To satisfy this requirement, utilize the reference count of the nvme_ctrlr. disconnected_qpair_cb() may call spdk_nvme_ctrlr_free_io_qpair() and spdk_io_device_unregister() successively. The spdk_io_device_unregister() will execute spdk_nvme_detach_async() from its callback. spdk_nvme_ctrlr_free_io_qpair() has to complete earlier than spdk_nvme_detach_async() starts. spdk_nvme_ctrlr_free_io_qpair() is executed after unwinding stack. spdk_nvme_detach_async() is executed after sending a message. Sending message is later than unwinding stack. Hence the requirement is satisfied naturally. spdk_io_device_unregister() for the nvme_ctrlr is required to be called on the nvme_ctrlr->thread. To satisfy this requirement, redirect nvme_ctrlr_unregister() to the nvme_ctrlr->thread. This change is too small to stand as an independent patch. So include the change in this patch. Change-Id: Id8c01966c40b1dae9c4ef17f1b0b3f60a0bd17d5 Signed-off-by: Shuhei Matsumoto Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10765 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- module/bdev/nvme/bdev_nvme.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 9f519989c..cec5060b9 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -515,8 +515,10 @@ nvme_ctrlr_unregister_cb(void *io_device) } static void -nvme_ctrlr_unregister(struct nvme_ctrlr *nvme_ctrlr) +nvme_ctrlr_unregister(void *ctx) { + struct nvme_ctrlr *nvme_ctrlr = ctx; + spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb); } @@ -558,7 +560,7 @@ nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr) pthread_mutex_unlock(&nvme_ctrlr->mutex); - nvme_ctrlr_unregister(nvme_ctrlr); + spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr); } static struct nvme_io_path * @@ -1095,6 +1097,8 @@ nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair return nvme_qpair; } +static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair); + static void bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) { @@ -1115,7 +1119,12 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ _bdev_nvme_clear_io_path_cache(nvme_qpair); - bdev_nvme_reset(nvme_qpair->ctrlr); + if (nvme_qpair->ctrlr_ch != NULL) { + bdev_nvme_reset(nvme_qpair->ctrlr); + } else { + /* In this case, ctrlr_channel is already deleted. */ + nvme_qpair_delete(nvme_qpair); + } } } @@ -2105,6 +2114,10 @@ nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrl ctrlr_ch->qpair = nvme_qpair; + pthread_mutex_lock(&nvme_qpair->ctrlr->mutex); + nvme_qpair->ctrlr->ref++; + pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex); + return 0; } @@ -2128,6 +2141,8 @@ nvme_qpair_delete(struct nvme_qpair *nvme_qpair) spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group)); + nvme_ctrlr_release(nvme_qpair->ctrlr); + free(nvme_qpair); } @@ -2143,11 +2158,16 @@ bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf) _bdev_nvme_clear_io_path_cache(nvme_qpair); if (nvme_qpair->qpair != NULL) { - spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair); - nvme_qpair->qpair = NULL; - } + spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair); - nvme_qpair_delete(nvme_qpair); + /* We cannot release a reference to the poll group now. + * The qpair may be disconnected asynchronously later. + * We need to poll it until it is actually disconnected. + */ + nvme_qpair->ctrlr_ch = NULL; + } else { + nvme_qpair_delete(nvme_qpair); + } } static void