bdev/nvme: Fail reset sequence immediately if ctrlr is already removed.
After a controller was hot-removed, if a reset sequence started to the controller, spdk_nvme_ctrlr_disconnect() failed and caused core dump in debug mode. When implemented, how to cause the failure and how to process the failure were not clear. Hence assert was added to detect the failure. We know how we cause the failure now. Let's handle the failure appropriately. If spdk_nvme_ctrlr_disconnect() fails, we are on the nvme_ctrlr->thread. Hence call bdev_nvme_reset_complete() with failure immediately. Even if spdk_nvme_ctrlr_disconnect() completes synchronously, the completion callback is executed asynchronously when polling an adminq. Hence set the completion callback only if spdk_nvme_ctrlr_disconnect() succeeds. Fixes issue #2632 Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: I11f61853aba9eca2515592f964a291e59def7247 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13892 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com> Reviewed-by: Krzysztof Karas <krzysztof.karas@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
6439abc0d8
commit
2826ef13ce
@ -1533,10 +1533,21 @@ bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
|
||||
}
|
||||
}
|
||||
|
||||
static void bdev_nvme_reset_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
|
||||
|
||||
static void
|
||||
nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
|
||||
{
|
||||
int rc __attribute__((unused));
|
||||
int rc;
|
||||
|
||||
rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
|
||||
if (rc != 0) {
|
||||
/* Disconnect fails if ctrlr is already resetting or removed. In this case,
|
||||
* fail the reset sequence immediately.
|
||||
*/
|
||||
bdev_nvme_reset_complete(nvme_ctrlr, false);
|
||||
return;
|
||||
}
|
||||
|
||||
/* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
|
||||
* Set callback here to execute the specified operation after ctrlr is really disconnected.
|
||||
@ -1544,13 +1555,6 @@ nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb
|
||||
assert(nvme_ctrlr->disconnected_cb == NULL);
|
||||
nvme_ctrlr->disconnected_cb = cb_fn;
|
||||
|
||||
/* Disconnect fails if ctrlr is already resetting or removed. Both cases are
|
||||
* not possible. Reset is controlled and the callback to hot remove is called
|
||||
* when ctrlr is hot removed.
|
||||
*/
|
||||
rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
|
||||
assert(rc == 0);
|
||||
|
||||
/* During disconnection, reduce the period to poll adminq more often. */
|
||||
bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
|
||||
}
|
||||
|
@ -233,6 +233,7 @@ struct spdk_nvme_ctrlr {
|
||||
bool attached;
|
||||
bool is_failed;
|
||||
bool fail_reset;
|
||||
bool is_removed;
|
||||
struct spdk_nvme_transport_id trid;
|
||||
TAILQ_HEAD(, spdk_nvme_qpair) active_io_qpairs;
|
||||
TAILQ_ENTRY(spdk_nvme_ctrlr) tailq;
|
||||
@ -764,6 +765,10 @@ spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr)
|
||||
int
|
||||
spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
|
||||
{
|
||||
if (ctrlr->is_removed) {
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
ctrlr->adminq.is_connected = false;
|
||||
ctrlr->is_failed = false;
|
||||
|
||||
@ -1303,6 +1308,17 @@ test_create_ctrlr(void)
|
||||
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
ut_check_hotplug_on_reset(void *cb_arg, bool success)
|
||||
{
|
||||
bool *detect_remove = cb_arg;
|
||||
|
||||
CU_ASSERT(success == false);
|
||||
SPDK_CU_ASSERT_FATAL(detect_remove != NULL);
|
||||
|
||||
*detect_remove = true;
|
||||
}
|
||||
|
||||
static void
|
||||
test_reset_ctrlr(void)
|
||||
{
|
||||
@ -1312,6 +1328,7 @@ test_reset_ctrlr(void)
|
||||
struct nvme_path_id *curr_trid;
|
||||
struct spdk_io_channel *ch1, *ch2;
|
||||
struct nvme_ctrlr_channel *ctrlr_ch1, *ctrlr_ch2;
|
||||
bool detect_remove;
|
||||
int rc;
|
||||
|
||||
ut_init_trid(&trid);
|
||||
@ -1406,6 +1423,24 @@ test_reset_ctrlr(void)
|
||||
CU_ASSERT(nvme_ctrlr->resetting == false);
|
||||
CU_ASSERT(curr_trid->is_failed == false);
|
||||
|
||||
/* Case 4: ctrlr is already removed. */
|
||||
ctrlr.is_removed = true;
|
||||
|
||||
rc = bdev_nvme_reset(nvme_ctrlr);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
detect_remove = false;
|
||||
nvme_ctrlr->reset_cb_fn = ut_check_hotplug_on_reset;
|
||||
nvme_ctrlr->reset_cb_arg = &detect_remove;
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_ctrlr->reset_cb_fn == NULL);
|
||||
CU_ASSERT(nvme_ctrlr->reset_cb_arg == NULL);
|
||||
CU_ASSERT(detect_remove == true);
|
||||
|
||||
ctrlr.is_removed = false;
|
||||
|
||||
spdk_put_io_channel(ch2);
|
||||
|
||||
set_thread(0);
|
||||
|
Loading…
Reference in New Issue
Block a user