diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index f9382c539..f5d120226 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -1041,6 +1041,25 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c const struct spdk_nvme_io_qpair_opts *opts, size_t opts_size); +/** + * Attempt to reconnect the given qpair. + * + * This function is intended to be called on qpairs that have already been connected, + * but have since entered a failed state as indicated by a return value of -ENXIO from + * either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions. + * + * \param qpair The qpair to reconnect. + * + * \return 0 on success, or if the qpair was already connected. + * -EAGAIN if the driver was unable to reconnect during this call, + * but the controller is still connected and is either resetting or enabled. + * -ENODEV if the controller is removed. In this case, the controller cannot be recovered + * and the application will have to destroy it and the associated qpairs. + * -ENXIO if the controller is in a failed state but is not yet resetting. In this case, + * the application should call spdk_nvme_ctrlr_reset to reset the entire controller. + */ +int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair); + /** * Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair(). * diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index ecc7dd8e9..6bbdeaf5b 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -379,6 +379,52 @@ spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr, return qpair; } +int +spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) +{ + struct spdk_nvme_ctrlr *ctrlr; + int rc; + + assert(qpair != NULL); + assert(nvme_qpair_is_admin_queue(qpair) == false); + assert(qpair->ctrlr != NULL); + + ctrlr = qpair->ctrlr; + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); + + if (ctrlr->is_removed) { + rc = -ENODEV; + goto out; + } + + if (ctrlr->is_resetting) { + rc = -EAGAIN; + goto out; + } + + if (ctrlr->is_failed) { + rc = -ENXIO; + goto out; + } + + if (!qpair->transport_qp_is_failed) { + rc = 0; + goto out; + } + + rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); + if (rc) { + nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED); + rc = -EAGAIN; + goto out; + } + nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); + +out: + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + return rc; +} + int spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) { diff --git a/test/unit/lib/nvme/nvme_ctrlr.c/nvme_ctrlr_ut.c b/test/unit/lib/nvme/nvme_ctrlr.c/nvme_ctrlr_ut.c index 3e6346103..307ff0e38 100644 --- a/test/unit/lib/nvme/nvme_ctrlr.c/nvme_ctrlr_ut.c +++ b/test/unit/lib/nvme/nvme_ctrlr.c/nvme_ctrlr_ut.c @@ -168,12 +168,6 @@ nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_ return 0; } -int -nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ - return 0; -} - void nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) { @@ -1424,6 +1418,69 @@ test_alloc_io_qpair_wrr_2(void) cleanup_qpairs(&ctrlr); } +bool g_connect_qpair_called = false; +int g_connect_qpair_return_code = 0; +int nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + g_connect_qpair_called = true; + return g_connect_qpair_return_code; +} + +static void +test_spdk_nvme_ctrlr_reconnect_io_qpair(void) +{ + struct spdk_nvme_ctrlr ctrlr = {}; + struct spdk_nvme_qpair qpair = {}; + int rc; + + /* Various states of controller disconnect. */ + qpair.id = 1; + qpair.ctrlr = &ctrlr; + ctrlr.is_removed = 1; + ctrlr.is_failed = 0; + ctrlr.is_resetting = 0; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -ENODEV) + + ctrlr.is_removed = 0; + ctrlr.is_failed = 1; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -ENXIO) + + ctrlr.is_failed = 0; + ctrlr.is_resetting = 1; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -EAGAIN) + + /* Confirm precedence for controller states: removed > resetting > failed */ + ctrlr.is_removed = 1; + ctrlr.is_failed = 1; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -ENODEV) + + ctrlr.is_removed = 0; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -EAGAIN) + + ctrlr.is_resetting = 0; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(rc == -ENXIO) + + /* qpair not failed. Make sure we don't call down to the transport */ + ctrlr.is_failed = 0; + qpair.transport_qp_is_failed = false; + g_connect_qpair_called = false; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(g_connect_qpair_called == false); + CU_ASSERT(rc == 0) + + /* transport qpair is failed. make sure we call down to the transport */ + qpair.transport_qp_is_failed = true; + rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair); + CU_ASSERT(g_connect_qpair_called == true); + CU_ASSERT(rc == 0) +} + static void test_nvme_ctrlr_fail(void) { @@ -1885,6 +1942,8 @@ int main(int argc, char **argv) #endif || CU_add_test(suite, "test nvme ctrlr function test_nvme_ctrlr_test_active_ns", test_nvme_ctrlr_test_active_ns) == NULL + || CU_add_test(suite, "test_spdk_nvme_ctrlr_reconnect_io_qpair", + test_spdk_nvme_ctrlr_reconnect_io_qpair) == NULL ) { CU_cleanup_registry(); return CU_get_error();