nvme: add function to reconnect qpairs.
While it is unlikely that a single qpair will be failed, it is important to make it possible to reconnect a single qpair. This function is also handy at the application layer when going through a reconnect workflow. If we get -ENXIO from a qpair when we poll, we will turn around and call this function. If we get -ENXIO from this function, then we know the whole controller is failed and we need to do a reset. Change-Id: I6a8ea0ce27fce2f5fc0a5b3db05834acd68e6a39 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/471417 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Alexey Marchuk <alexeymar@mellanox.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
efc0a86426
commit
e45b619c3d
@ -1041,6 +1041,25 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c
|
|||||||
const struct spdk_nvme_io_qpair_opts *opts,
|
const struct spdk_nvme_io_qpair_opts *opts,
|
||||||
size_t opts_size);
|
size_t opts_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to reconnect the given qpair.
|
||||||
|
*
|
||||||
|
* This function is intended to be called on qpairs that have already been connected,
|
||||||
|
* but have since entered a failed state as indicated by a return value of -ENXIO from
|
||||||
|
* either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions.
|
||||||
|
*
|
||||||
|
* \param qpair The qpair to reconnect.
|
||||||
|
*
|
||||||
|
* \return 0 on success, or if the qpair was already connected.
|
||||||
|
* -EAGAIN if the driver was unable to reconnect during this call,
|
||||||
|
* but the controller is still connected and is either resetting or enabled.
|
||||||
|
* -ENODEV if the controller is removed. In this case, the controller cannot be recovered
|
||||||
|
* and the application will have to destroy it and the associated qpairs.
|
||||||
|
* -ENXIO if the controller is in a failed state but is not yet resetting. In this case,
|
||||||
|
* the application should call spdk_nvme_ctrlr_reset to reset the entire controller.
|
||||||
|
*/
|
||||||
|
int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
|
* Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
|
||||||
*
|
*
|
||||||
|
@ -379,6 +379,52 @@ spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
|
|||||||
return qpair;
|
return qpair;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
|
||||||
|
{
|
||||||
|
struct spdk_nvme_ctrlr *ctrlr;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
assert(qpair != NULL);
|
||||||
|
assert(nvme_qpair_is_admin_queue(qpair) == false);
|
||||||
|
assert(qpair->ctrlr != NULL);
|
||||||
|
|
||||||
|
ctrlr = qpair->ctrlr;
|
||||||
|
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
|
||||||
|
|
||||||
|
if (ctrlr->is_removed) {
|
||||||
|
rc = -ENODEV;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctrlr->is_resetting) {
|
||||||
|
rc = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctrlr->is_failed) {
|
||||||
|
rc = -ENXIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!qpair->transport_qp_is_failed) {
|
||||||
|
rc = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
|
||||||
|
if (rc) {
|
||||||
|
nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
|
||||||
|
rc = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
|
||||||
|
|
||||||
|
out:
|
||||||
|
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
|
spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
|
||||||
{
|
{
|
||||||
|
@ -168,12 +168,6 @@ nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
|
nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
|
||||||
{
|
{
|
||||||
@ -1424,6 +1418,69 @@ test_alloc_io_qpair_wrr_2(void)
|
|||||||
cleanup_qpairs(&ctrlr);
|
cleanup_qpairs(&ctrlr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool g_connect_qpair_called = false;
|
||||||
|
int g_connect_qpair_return_code = 0;
|
||||||
|
int nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
|
||||||
|
{
|
||||||
|
g_connect_qpair_called = true;
|
||||||
|
return g_connect_qpair_return_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_spdk_nvme_ctrlr_reconnect_io_qpair(void)
|
||||||
|
{
|
||||||
|
struct spdk_nvme_ctrlr ctrlr = {};
|
||||||
|
struct spdk_nvme_qpair qpair = {};
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Various states of controller disconnect. */
|
||||||
|
qpair.id = 1;
|
||||||
|
qpair.ctrlr = &ctrlr;
|
||||||
|
ctrlr.is_removed = 1;
|
||||||
|
ctrlr.is_failed = 0;
|
||||||
|
ctrlr.is_resetting = 0;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -ENODEV)
|
||||||
|
|
||||||
|
ctrlr.is_removed = 0;
|
||||||
|
ctrlr.is_failed = 1;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -ENXIO)
|
||||||
|
|
||||||
|
ctrlr.is_failed = 0;
|
||||||
|
ctrlr.is_resetting = 1;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -EAGAIN)
|
||||||
|
|
||||||
|
/* Confirm precedence for controller states: removed > resetting > failed */
|
||||||
|
ctrlr.is_removed = 1;
|
||||||
|
ctrlr.is_failed = 1;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -ENODEV)
|
||||||
|
|
||||||
|
ctrlr.is_removed = 0;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -EAGAIN)
|
||||||
|
|
||||||
|
ctrlr.is_resetting = 0;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(rc == -ENXIO)
|
||||||
|
|
||||||
|
/* qpair not failed. Make sure we don't call down to the transport */
|
||||||
|
ctrlr.is_failed = 0;
|
||||||
|
qpair.transport_qp_is_failed = false;
|
||||||
|
g_connect_qpair_called = false;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(g_connect_qpair_called == false);
|
||||||
|
CU_ASSERT(rc == 0)
|
||||||
|
|
||||||
|
/* transport qpair is failed. make sure we call down to the transport */
|
||||||
|
qpair.transport_qp_is_failed = true;
|
||||||
|
rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
|
||||||
|
CU_ASSERT(g_connect_qpair_called == true);
|
||||||
|
CU_ASSERT(rc == 0)
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
test_nvme_ctrlr_fail(void)
|
test_nvme_ctrlr_fail(void)
|
||||||
{
|
{
|
||||||
@ -1885,6 +1942,8 @@ int main(int argc, char **argv)
|
|||||||
#endif
|
#endif
|
||||||
|| CU_add_test(suite, "test nvme ctrlr function test_nvme_ctrlr_test_active_ns",
|
|| CU_add_test(suite, "test nvme ctrlr function test_nvme_ctrlr_test_active_ns",
|
||||||
test_nvme_ctrlr_test_active_ns) == NULL
|
test_nvme_ctrlr_test_active_ns) == NULL
|
||||||
|
|| CU_add_test(suite, "test_spdk_nvme_ctrlr_reconnect_io_qpair",
|
||||||
|
test_spdk_nvme_ctrlr_reconnect_io_qpair) == NULL
|
||||||
) {
|
) {
|
||||||
CU_cleanup_registry();
|
CU_cleanup_registry();
|
||||||
return CU_get_error();
|
return CU_get_error();
|
||||||
|
Loading…
Reference in New Issue
Block a user