bdev/nvme: Reconnect ctrlr after it is disconnected at completion poller

spdk_nvme_ctrlr_disconnect() will be made asynchronous in the
following patches and so we will need to have some changes.

spdk_nvme_ctrlr_disconnect() disconnects adminq and ctrlr synchronously
now.

If spdk_nvme_ctrlr_disconnect() is made asynchronous,
spdk_nvme_ctrlr_process_admin_completions() will complete to disconnect
adminq and ctrlr, and will return -ENXIO only if adminq is disconnected.

However even now spdk_nvme_ctrlr_process_admin_completions() returns
-ENXIO if adminq is disconnected.

So as a preparation, set a callback before calling spdk_nvme_ctrlr_disconnect()
and call the callback if it is set and spdk_nvme_ctrlr_process_admin_completions()
returns -ENXIO.

Besides, fix the return value of bdev_nvme_poll_adminq() in this patch.

Change-Id: I2559f86bb8cf9a92b5b386ed816c00b08c9832df
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10950
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-01-28 09:34:10 +09:00 committed by Tomasz Zawadzki
parent a76bbe3553
commit aca0d56e3d
3 changed files with 138 additions and 13 deletions

View File

@ -1179,12 +1179,20 @@ bdev_nvme_poll_adminq(void *arg)
{
int32_t rc;
struct nvme_ctrlr *nvme_ctrlr = arg;
nvme_ctrlr_disconnected_cb disconnected_cb;
assert(nvme_ctrlr != NULL);
rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
if (rc < 0) {
bdev_nvme_failover(nvme_ctrlr, false);
disconnected_cb = nvme_ctrlr->disconnected_cb;
nvme_ctrlr->disconnected_cb = NULL;
if (rc == -ENXIO && disconnected_cb != NULL) {
disconnected_cb(nvme_ctrlr);
} else {
bdev_nvme_failover(nvme_ctrlr, false);
}
}
return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
@ -1503,8 +1511,13 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status)
_bdev_nvme_delete(nvme_ctrlr, false);
break;
case OP_DELAYED_RECONNECT:
/* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
* Set callback here to start reconnect delay timer after ctrlr is really disconnected.
*/
assert(nvme_ctrlr->disconnected_cb == NULL);
nvme_ctrlr->disconnected_cb = bdev_nvme_start_reconnect_delay_timer;
spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
bdev_nvme_start_reconnect_delay_timer(nvme_ctrlr);
break;
default:
break;
@ -1626,14 +1639,18 @@ bdev_nvme_reset_ctrlr(struct spdk_io_channel_iter *i, int status)
assert(status == 0);
/* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
* Set callback here to reconnect after ctrlr is really disconnected.
*/
assert(nvme_ctrlr->disconnected_cb == NULL);
nvme_ctrlr->disconnected_cb = bdev_nvme_reconnect_ctrlr;
/* Disconnect fails if ctrlr is already resetting or removed. Both cases are
* not possible. Reset is controlled and the callback to hot remove is called
* when ctrlr is hot removed.
*/
rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
assert(rc == 0);
bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
}
static void

View File

@ -102,6 +102,7 @@ struct nvme_path_id {
};
typedef void (*bdev_nvme_reset_cb)(void *cb_arg, bool success);
typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr);
struct nvme_ctrlr {
/**
@ -137,6 +138,8 @@ struct nvme_ctrlr {
uint64_t reset_start_tsc;
struct spdk_poller *reconnect_delay_timer;
nvme_ctrlr_disconnected_cb disconnected_cb;
/** linked list pointer for device list */
TAILQ_ENTRY(nvme_ctrlr) tailq;
struct nvme_bdev_ctrlr *nbdev_ctrlr;

View File

@ -1350,6 +1350,11 @@ test_reset_ctrlr(void)
poll_thread_times(1, 1);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr.adminq.is_connected == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 2);
CU_ASSERT(ctrlr.adminq.is_connected == true);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr_ch1->qpair->qpair != NULL);
@ -1431,6 +1436,8 @@ test_race_between_reset_and_destruct_ctrlr(void)
CU_ASSERT(nvme_ctrlr->destruct == true);
CU_ASSERT(nvme_ctrlr->resetting == true);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
/* Reset completed but ctrlr is not still destructed yet. */
@ -1524,6 +1531,8 @@ test_failover_ctrlr(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(curr_trid->is_failed == true);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
curr_trid = TAILQ_FIRST(&nvme_ctrlr->trids);
@ -1566,6 +1575,8 @@ test_failover_ctrlr(void)
CU_ASSERT(next_trid == nvme_ctrlr->active_path_id);
CU_ASSERT(spdk_nvme_transport_id_compare(&next_trid->trid, &trid2) == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
@ -1652,6 +1663,8 @@ test_race_between_failover_and_add_secondary_trid(void)
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(path_id1->is_failed == true);
@ -1672,6 +1685,8 @@ test_race_between_failover_and_add_secondary_trid(void)
SPDK_CU_ASSERT_FATAL(path_id3 != NULL);
CU_ASSERT(spdk_nvme_transport_id_compare(&path_id3->trid, &trid3) == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
spdk_put_io_channel(ch1);
@ -1779,6 +1794,8 @@ test_pending_reset(void)
bdev_nvme_submit_request(ch1, second_bdev_io);
CU_ASSERT(TAILQ_FIRST(&ctrlr_ch1->pending_resets) == second_bdev_io);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
@ -1804,6 +1821,8 @@ test_pending_reset(void)
ctrlr->fail_reset = true;
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
@ -2362,6 +2381,8 @@ test_add_remove_trid(void)
}
CU_ASSERT(spdk_nvme_transport_id_compare(&nvme_ctrlr->active_path_id->trid, &path3.trid) == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
@ -2616,6 +2637,10 @@ test_abort(void)
CU_ASSERT(write_io->internal.in_submit_request == false);
CU_ASSERT(write_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
spdk_put_io_channel(ch1);
set_thread(1);
@ -2987,6 +3012,11 @@ test_reconnect_qpair(void)
poll_thread_times(1, 2);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr->is_failed == false);
CU_ASSERT(ctrlr->adminq.is_connected == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 2);
CU_ASSERT(ctrlr->adminq.is_connected == true);
poll_thread_times(0, 1);
poll_thread_times(1, 1);
@ -3019,7 +3049,8 @@ test_reconnect_qpair(void)
CU_ASSERT(nvme_qpair2->qpair == NULL);
CU_ASSERT(ctrlr->is_failed == true);
poll_thread_times(0, 2);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 3);
poll_thread_times(1, 1);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr->is_failed == true);
@ -3820,8 +3851,13 @@ test_reset_bdev_ctrlr(void)
poll_thread_times(0, 1);
CU_ASSERT(nvme_ctrlr1->resetting == true);
CU_ASSERT(ctrlr1->is_failed == false);
CU_ASSERT(ctrlr1->adminq.is_connected == false);
CU_ASSERT(curr_path1->is_failed == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 2);
CU_ASSERT(ctrlr1->adminq.is_connected == true);
poll_thread_times(0, 1);
CU_ASSERT(io_path11->qpair->qpair != NULL);
CU_ASSERT(io_path21->qpair->qpair == NULL);
@ -3852,8 +3888,13 @@ test_reset_bdev_ctrlr(void)
poll_thread_times(0, 2);
CU_ASSERT(nvme_ctrlr2->resetting == true);
CU_ASSERT(ctrlr2->is_failed == false);
CU_ASSERT(ctrlr2->adminq.is_connected == false);
CU_ASSERT(curr_path2->is_failed == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 2);
CU_ASSERT(ctrlr2->adminq.is_connected == true);
poll_thread_times(0, 1);
CU_ASSERT(io_path12->qpair->qpair != NULL);
CU_ASSERT(io_path22->qpair->qpair == NULL);
@ -3901,6 +3942,10 @@ test_reset_bdev_ctrlr(void)
CU_ASSERT(nvme_ctrlr1->reset_cb_arg == first_bio);
CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) == second_bdev_io);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(ctrlr1->is_failed == false);
@ -4580,7 +4625,8 @@ test_concurrent_read_ana_log_page(void)
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
@ -4595,6 +4641,10 @@ test_concurrent_read_ana_log_page(void)
CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
set_thread(0);
rc = bdev_nvme_delete("nvme0", &g_any_path);
@ -5193,12 +5243,14 @@ test_retry_io_if_ctrlr_is_resetting(void)
CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link));
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_qpair->qpair != NULL);
CU_ASSERT(nvme_ctrlr->resetting == false);
spdk_delay_us(999999);
spdk_delay_us(999999 - g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 1);
@ -5332,11 +5384,13 @@ test_retry_admin_passthru_if_ctrlr_is_resetting(void)
CU_ASSERT(admin_io->internal.in_submit_request == true);
CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
spdk_delay_us(1000000);
spdk_delay_us(1000000 - g_opts.nvme_adminq_poll_period_us);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
@ -5417,12 +5471,20 @@ test_reconnect_ctrlr(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr.is_failed == true);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
@ -5451,12 +5513,20 @@ test_reconnect_ctrlr(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr.is_failed == true);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
@ -5467,6 +5537,8 @@ test_reconnect_ctrlr(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
@ -5561,16 +5633,24 @@ test_retry_failover_ctrlr(void)
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
CU_ASSERT(path_id1->is_failed == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
path_id2 = ut_get_path_id_by_trid(nvme_ctrlr, &trid2);
SPDK_CU_ASSERT_FATAL(path_id2 != NULL);
CU_ASSERT(path_id2->is_failed == false);
@ -5600,6 +5680,8 @@ test_retry_failover_ctrlr(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(path_id3->is_failed == false);
@ -5710,15 +5792,22 @@ test_fail_path(void)
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr->is_failed == true);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr->is_failed == false);
CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(nvme_ctrlr->reset_start_tsc != 0);
CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
/* I/O should be queued. */
bdev_io->internal.in_submit_request = true;
@ -5739,10 +5828,15 @@ test_fail_path(void)
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr->is_failed == false);
CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false);
CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
/* After two seconds, ctrlr_fail_timeout_sec should expire. */
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_threads();
@ -5750,14 +5844,22 @@ test_fail_path(void)
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr->is_failed == false);
CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false);
CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
/* Then within a second, pending I/O should be failed. */
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(bdev_io->internal.in_submit_request == false);
CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED);
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
@ -5776,6 +5878,9 @@ test_fail_path(void)
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr == nvme_ctrlr_get_by_name("nvme0"));
CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == true);
CU_ASSERT(nvme_ctrlr->destruct == true);