diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 13982ae14..3ab81878b 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -1091,6 +1091,8 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c * This function is intended to be called on qpairs that have already been connected, * but have since entered a failed state as indicated by a return value of -ENXIO from * either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions. + * This function must be called from the same thread as spdk_nvme_qpair_process_completions + * and the spdk_nvme_ns_cmd_* functions. * * \param qpair The qpair to reconnect. * diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 0780093ba..e89c1c2d0 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -412,13 +412,18 @@ spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair) goto out; } + /* We have to confirm that any old memory is cleaned up. */ + nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); + rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair); if (rc) { nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED); + qpair->transport_qp_is_failed = true; rc = -EAGAIN; goto out; } nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); + qpair->transport_qp_is_failed = false; out: nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); @@ -1073,7 +1078,7 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) /* Disable all queues before disabling the controller hardware. */ TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED); - nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); + qpair->transport_qp_is_failed = true; } nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_DISABLED); nvme_qpair_complete_error_reqs(ctrlr->adminq); @@ -1102,7 +1107,14 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) } } - if (rc == 0) { + /* + * For PCIe controllers, the memory locations of the tranpsort qpair + * don't change when the controller is reset. They simply need to be + * re-enabled with admin commands to the controller. For fabric + * controllers we need to disconnect and reconnect the qpair on its + * own thread outside of the context of the reset. + */ + if (rc == 0 && ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { /* Reinitialize qpairs */ TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) { if (nvme_transport_ctrlr_connect_qpair(ctrlr, qpair) != 0) { @@ -1111,6 +1123,7 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) continue; } nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); + qpair->transport_qp_is_failed = false; } } diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c index 778f9d4db..63508c03f 100644 --- a/lib/nvme/nvme_qpair.c +++ b/lib/nvme/nvme_qpair.c @@ -458,7 +458,7 @@ spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ * qpair is not enabled, likely because a controller reset is * in progress. */ - return 0; + return -ENXIO; } /* error injection for those queued error requests */ diff --git a/test/unit/lib/nvme/nvme_qpair.c/nvme_qpair_ut.c b/test/unit/lib/nvme/nvme_qpair.c/nvme_qpair_ut.c index 665fd1f38..e1be64a7c 100644 --- a/test/unit/lib/nvme/nvme_qpair.c/nvme_qpair_ut.c +++ b/test/unit/lib/nvme/nvme_qpair.c/nvme_qpair_ut.c @@ -243,18 +243,18 @@ static void test_nvme_qpair_process_completions(void) ctrlr.is_removed = false; ctrlr.is_resetting = true; rc = spdk_nvme_qpair_process_completions(&qpair, 0); - CU_ASSERT(rc == 0); + CU_ASSERT(rc == -ENXIO); CU_ASSERT(g_called_transport_process_completions == false); /* We also need to make sure we didn't abort the requests. */ CU_ASSERT(!STAILQ_EMPTY(&qpair.queued_req)); CU_ASSERT(g_num_cb_passed == 0); CU_ASSERT(g_num_cb_failed == 0); - /* The case where we aren't resetting, but are enablign the qpair is the same as above. */ + /* The case where we aren't resetting, but are enabling the qpair is the same as above. */ ctrlr.is_resetting = false; qpair.state = NVME_QPAIR_ENABLING; rc = spdk_nvme_qpair_process_completions(&qpair, 0); - CU_ASSERT(rc == 0); + CU_ASSERT(rc == -ENXIO); CU_ASSERT(g_called_transport_process_completions == false); CU_ASSERT(!STAILQ_EMPTY(&qpair.queued_req)); CU_ASSERT(g_num_cb_passed == 0);