bdev/nvme: Reset I/O cancels reconnect timer and starts reconnection

Previously, if a reconnect timer was registered when a reset request
came, the reset request failed with -EBUSY. However, this means the
reset request was queued for a long time until the reconnect timer was
expired.

When a reconnect timer is registered, reset is not actually in progress.
Hence, a new reset request can cancel the reconnect timer and can start
reconnection safely.

Add a unit test case to verify this change.

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ied8dd0ad822d2fd6829d88cd56cb36bd4fad13f9
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16823
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
This commit is contained in:
Shuhei Matsumoto 2023-02-16 10:55:40 +09:00 committed by David Ko
parent ec2abc81a2
commit 7ea8a5aae5
2 changed files with 56 additions and 9 deletions

View File

@ -1863,7 +1863,11 @@ bdev_nvme_reconnect_delay_timer_expired(void *ctx)
spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
assert(nvme_ctrlr->reconnect_is_delayed == true);
if (!nvme_ctrlr->reconnect_is_delayed) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return SPDK_POLLER_BUSY;
}
nvme_ctrlr->reconnect_is_delayed = false;
if (nvme_ctrlr->destruct) {
@ -2081,6 +2085,21 @@ bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
bdev_nvme_reset_ctrlr);
}
static void
_bdev_nvme_reconnect(void *ctx)
{
struct nvme_ctrlr *nvme_ctrlr = ctx;
assert(nvme_ctrlr->resetting == true);
assert(nvme_ctrlr->thread == spdk_get_thread());
spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
}
static void
_bdev_nvme_reset(void *ctx)
{
@ -2099,6 +2118,8 @@ _bdev_nvme_reset(void *ctx)
static int
bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr)
{
spdk_msg_fn msg_fn;
pthread_mutex_lock(&nvme_ctrlr->mutex);
if (nvme_ctrlr->destruct) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
@ -2111,20 +2132,22 @@ bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr)
return -EBUSY;
}
if (nvme_ctrlr->reconnect_is_delayed) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
SPDK_NOTICELOG("Reconnect is already scheduled.\n");
return -EBUSY;
}
nvme_ctrlr->resetting = true;
if (nvme_ctrlr->reconnect_is_delayed) {
SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
msg_fn = _bdev_nvme_reconnect;
nvme_ctrlr->reconnect_is_delayed = false;
} else {
msg_fn = _bdev_nvme_reset;
assert(nvme_ctrlr->reset_start_tsc == 0);
}
nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
pthread_mutex_unlock(&nvme_ctrlr->mutex);
spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset, nvme_ctrlr);
spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
return 0;
}

View File

@ -5251,6 +5251,30 @@ test_reconnect_ctrlr(void)
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
/* A new reset starts from thread 0. */
set_thread(1);
/* The reset should cancel the reconnect timer and should start from reconnection.
* Then, the reset should fail and a reconnect timer should be registered again.
*/
ctrlr.fail_reset = true;
ctrlr.is_failed = true;
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
CU_ASSERT(ctrlr.is_failed == true);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
/* Then a reconnect retry should suceeed. */
ctrlr.fail_reset = false;