nvme: add function to reconnect qpairs.

While it is unlikely that a single qpair will be failed, it is important
to make it possible to reconnect a single qpair.

This function is also handy at the application layer when going through
a reconnect workflow. If we get -ENXIO from a qpair when we poll, we
will turn around and call this function. If we get -ENXIO from this
function, then we know the whole controller is failed and we need to do
a reset.

Change-Id: I6a8ea0ce27fce2f5fc0a5b3db05834acd68e6a39
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/471417
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Alexey Marchuk <alexeymar@mellanox.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Seth Howell 2019-10-15 13:37:38 -07:00 committed by Jim Harris
parent efc0a86426
commit e45b619c3d
3 changed files with 130 additions and 6 deletions

View File

@ -1041,6 +1041,25 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c
const struct spdk_nvme_io_qpair_opts *opts,
size_t opts_size);
/**
* Attempt to reconnect the given qpair.
*
* This function is intended to be called on qpairs that have already been connected,
* but have since entered a failed state as indicated by a return value of -ENXIO from
* either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions.
*
* \param qpair The qpair to reconnect.
*
* \return 0 on success, or if the qpair was already connected.
* -EAGAIN if the driver was unable to reconnect during this call,
* but the controller is still connected and is either resetting or enabled.
* -ENODEV if the controller is removed. In this case, the controller cannot be recovered
* and the application will have to destroy it and the associated qpairs.
* -ENXIO if the controller is in a failed state but is not yet resetting. In this case,
* the application should call spdk_nvme_ctrlr_reset to reset the entire controller.
*/
int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);
/**
* Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
*

View File

@ -379,6 +379,52 @@ spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
return qpair;
}
int
spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
{
struct spdk_nvme_ctrlr *ctrlr;
int rc;
assert(qpair != NULL);
assert(nvme_qpair_is_admin_queue(qpair) == false);
assert(qpair->ctrlr != NULL);
ctrlr = qpair->ctrlr;
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
if (ctrlr->is_removed) {
rc = -ENODEV;
goto out;
}
if (ctrlr->is_resetting) {
rc = -EAGAIN;
goto out;
}
if (ctrlr->is_failed) {
rc = -ENXIO;
goto out;
}
if (!qpair->transport_qp_is_failed) {
rc = 0;
goto out;
}
rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
if (rc) {
nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
rc = -EAGAIN;
goto out;
}
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
out:
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
return rc;
}
int
spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
{

View File

@ -168,12 +168,6 @@ nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_
return 0;
}
int
nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
return 0;
}
void
nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
@ -1424,6 +1418,69 @@ test_alloc_io_qpair_wrr_2(void)
cleanup_qpairs(&ctrlr);
}
bool g_connect_qpair_called = false;
int g_connect_qpair_return_code = 0;
int nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
g_connect_qpair_called = true;
return g_connect_qpair_return_code;
}
static void
test_spdk_nvme_ctrlr_reconnect_io_qpair(void)
{
struct spdk_nvme_ctrlr ctrlr = {};
struct spdk_nvme_qpair qpair = {};
int rc;
/* Various states of controller disconnect. */
qpair.id = 1;
qpair.ctrlr = &ctrlr;
ctrlr.is_removed = 1;
ctrlr.is_failed = 0;
ctrlr.is_resetting = 0;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -ENODEV)
ctrlr.is_removed = 0;
ctrlr.is_failed = 1;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -ENXIO)
ctrlr.is_failed = 0;
ctrlr.is_resetting = 1;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -EAGAIN)
/* Confirm precedence for controller states: removed > resetting > failed */
ctrlr.is_removed = 1;
ctrlr.is_failed = 1;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -ENODEV)
ctrlr.is_removed = 0;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -EAGAIN)
ctrlr.is_resetting = 0;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(rc == -ENXIO)
/* qpair not failed. Make sure we don't call down to the transport */
ctrlr.is_failed = 0;
qpair.transport_qp_is_failed = false;
g_connect_qpair_called = false;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(g_connect_qpair_called == false);
CU_ASSERT(rc == 0)
/* transport qpair is failed. make sure we call down to the transport */
qpair.transport_qp_is_failed = true;
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
CU_ASSERT(g_connect_qpair_called == true);
CU_ASSERT(rc == 0)
}
static void
test_nvme_ctrlr_fail(void)
{
@ -1885,6 +1942,8 @@ int main(int argc, char **argv)
#endif
|| CU_add_test(suite, "test nvme ctrlr function test_nvme_ctrlr_test_active_ns",
test_nvme_ctrlr_test_active_ns) == NULL
|| CU_add_test(suite, "test_spdk_nvme_ctrlr_reconnect_io_qpair",
test_spdk_nvme_ctrlr_reconnect_io_qpair) == NULL
) {
CU_cleanup_registry();
return CU_get_error();