From c7f0c88d5d1e057a3f2ecc755e2a630ae80bb822 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Sat, 27 Nov 2021 03:22:08 +0900 Subject: [PATCH] nvme: Add three APIs for disconnect, start re-enable, and poll re-enable ctrlr The NVMe bdev module will support two features, delayed reconnect and delete after multiple failures of reconnect to improve error recovery. The recently added two APIs, spdk_nvme_ctrlr_reset_async() and spdk_nvme_ctrlr_reset_poll_async(), were not good enough. spdk_nvme_ctrlr_reset_ctx was not necessary. It had only a pointer to ctrlr. Using a pointer to ctrlr directly saves us from undesirable malloc error processing. Separate spdk_nvme_ctrlr_reset_async() into spdk_nvme_ctrlr_disconnect() and spdk_nvme_ctrlr_reconnect_async(). spdk_nvme_ctrlr_disconnect() disconnects ctrlr including disconnecting adminq. spdk_nvme_ctrlr_reconnect_async() moves the ctrlr state to INIT. Then rename spdk_nvme_ctrlr_reset_poll_async() by spdk_nvme_ctrlr_reconnect_poll_async(). Finally deprecate spdk_nvme_ctrlr_reset_async() and spdk_nvme_ctrlr_reset_poll_async(). The following patches will change the NVMe bdev module to use these new APIs. Signed-off-by: Shuhei Matsumoto Change-Id: Id1d6858dcdc5fc2e9db0a6ebf3f79cab4f9bbcb7 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10091 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- CHANGELOG.md | 7 +++++++ deprecation.md | 6 ++++++ include/spdk/nvme.h | 35 +++++++++++++++++++++++++++++++++++ lib/nvme/Makefile | 2 +- lib/nvme/nvme_ctrlr.c | 36 ++++++++++++++++++++++++++++++------ lib/nvme/spdk_nvme.map | 3 +++ 6 files changed, 82 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a19bddc0..a77c970c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,13 @@ removed in SPDK 22.04, and the parameter `transport_retry_count` is added and us An new parameter `bdev_retry_count` is added to the RPC `bdev_nvme_set_options`. +### nvme + +New APIs, `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`, and +`spdk_nvme_ctrlr_reconnect_poll_async`, have been added to improve error recovery, and +the existing APIs,`spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async` +were deprecated. + ## v21.10 Structure `spdk_nvmf_target_opts` has been extended with new member `discovery_filter` which allows to specify diff --git a/deprecation.md b/deprecation.md index a7e7c2236..5c028ee5a 100644 --- a/deprecation.md +++ b/deprecation.md @@ -18,3 +18,9 @@ ABI cannot be removed without providing deprecation notice for at least single S Deprecated `spdk_bdev_module_finish_done()` API, which will be removed in SPDK 22.01. Bdev modules should use `spdk_bdev_module_fini_done()` instead. + +### nvme + +Deprecated `spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async` APIs, +which will be removed in SPDK 22.01. `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`, +and `spdk_nvme_ctrlr_reconnect_poll_async` should be used instead. diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index ddadbb121..09de8b933 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -1079,6 +1079,8 @@ struct spdk_nvme_ctrlr_reset_ctx; /** * Create a context object that can be polled to perform a full hardware reset of the NVMe controller. + * (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and + * spdk_nvme_ctrlr_reconnect_poll_async() instead.) * * The function will set the controller reset context on success, user must call * spdk_nvme_ctrlr_reset_poll_async() until it returns a value other than -EAGAIN. @@ -1097,6 +1099,8 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr, /** * Proceed with resetting controller associated with the controller reset context. + * (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and + * spdk_nvme_ctrlr_reconnect_poll_async() instead.) * * The controller reset context is one returned from a previous call to * spdk_nvme_ctrlr_reset_async(). Users must call this function on the @@ -1111,6 +1115,37 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr, */ int spdk_nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx); +/** + * Disconnect the given NVMe controller. + * + * This function is used as the first operation of a full reset sequence of the given NVMe + * controller. The NVMe controller is ready to reconnect after completing this function. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return 0 on success, -EBUSY if controller is already resetting, or -ENXIO if controller + * has been removed. + */ +int spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Start re-enabling the given NVMe controller in a full reset sequence + * + * \param ctrlr Opaque handle to NVMe controller. + */ +void spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Proceed with re-enabling the given NVMe controller. + * + * Users must call this function in a full reset sequence until it returns a value other + * than -EAGAIN. + * + * \return 0 if the given NVMe controller is enabled, or -EBUSY if there are still + * pending operations to enable it. + */ +int spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr); + /** * Perform a NVMe subsystem reset. * diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile index 01f01c98c..81af5e544 100644 --- a/lib/nvme/Makefile +++ b/lib/nvme/Makefile @@ -35,7 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 7 -SO_MINOR := 0 +SO_MINOR := 1 C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie_common.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c \ nvme_ctrlr_ocssd_cmd.c nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 8a48b4a35..85c71409f 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -1610,8 +1610,8 @@ nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr) } } -static int -nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr) +int +spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr) { struct spdk_nvme_qpair *qpair; @@ -1657,10 +1657,34 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr) spdk_bit_array_free(&ctrlr->free_io_qids); + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + return 0; +} + +void +spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr) +{ + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); + /* Set the state back to INIT to cause a full hardware reset. */ nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); - /* Return without releasing ctrlr_lock. ctrlr_lock will be released when spdk_nvme_ctrlr_reset_poll_async() returns 0. */ + /* Return without releasing ctrlr_lock. ctrlr_lock will be released when + * spdk_nvme_ctrlr_reset_poll_async() returns 0. + */ +} + +static int +nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr) +{ + int rc; + + rc = spdk_nvme_ctrlr_disconnect(ctrlr); + if (rc != 0) { + return rc; + } + + spdk_nvme_ctrlr_reconnect_async(ctrlr); return 0; } @@ -1668,8 +1692,8 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr) * This function will be called when the controller is being reinitialized. * Note: the ctrlr_lock must be held when calling this function. */ -static int -nvme_ctrlr_reinit_on_reset(struct spdk_nvme_ctrlr *ctrlr) +int +spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr) { struct spdk_nvme_qpair *qpair; int rc = 0, rc_tmp = 0; @@ -1742,7 +1766,7 @@ nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx) { struct spdk_nvme_ctrlr *ctrlr = ctrlr_reset_ctx->ctrlr; - return nvme_ctrlr_reinit_on_reset(ctrlr); + return spdk_nvme_ctrlr_reconnect_poll_async(ctrlr); } int diff --git a/lib/nvme/spdk_nvme.map b/lib/nvme/spdk_nvme.map index 9d4d57805..2f9af62f0 100644 --- a/lib/nvme/spdk_nvme.map +++ b/lib/nvme/spdk_nvme.map @@ -40,6 +40,9 @@ spdk_nvme_ctrlr_prepare_for_reset; spdk_nvme_ctrlr_reset_async; spdk_nvme_ctrlr_reset_poll_async; + spdk_nvme_ctrlr_disconnect; + spdk_nvme_ctrlr_reconnect_async; + spdk_nvme_ctrlr_reconnect_poll_async; spdk_nvme_ctrlr_fail; spdk_nvme_ctrlr_is_failed; spdk_nvme_ctrlr_get_data;