From e4cc49bc951812fd9d8dad7f763424b71fb357e1 Mon Sep 17 00:00:00 2001 From: Seth Howell Date: Fri, 12 Jun 2020 16:35:47 -0700 Subject: [PATCH] bdev/nvme: add failover option to bdev_nvme_reset When we fail to process admin completions on a controller attempt to failover to a previously registered trid Signed-off-by: Seth Howell Change-Id: I547bd010f4b339b2af7f2b33027cddad4b4926bc Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3045 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Jim Harris Reviewed-by: Ben Walker --- module/bdev/nvme/bdev_nvme.c | 48 ++++++++++++++++++++++++++++++------ module/bdev/nvme/common.h | 1 + 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index e06ef5c01..e8acdfb34 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -162,9 +162,10 @@ static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); -static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio); static int bdev_nvme_abort(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort); +static int bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio, + bool failover); typedef void (*populate_namespace_fn)(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_ns *nvme_ns, struct nvme_async_probe_ctx *ctx); @@ -272,7 +273,7 @@ bdev_nvme_poll_adminq(void *arg) rc = spdk_nvme_ctrlr_process_admin_completions(nvme_bdev_ctrlr->ctrlr); if (rc < 0) { - bdev_nvme_reset(nvme_bdev_ctrlr, NULL); + bdev_nvme_reset(nvme_bdev_ctrlr, NULL, true); } return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; @@ -338,6 +339,7 @@ _bdev_nvme_reset_complete(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, int rc) pthread_mutex_lock(&g_bdev_nvme_mutex); nvme_bdev_ctrlr->resetting = false; + nvme_bdev_ctrlr->failover_in_progress = false; pthread_mutex_unlock(&g_bdev_nvme_mutex); /* Make sure we clear any pending resets before returning. */ spdk_for_each_channel(nvme_bdev_ctrlr, @@ -447,10 +449,12 @@ _bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i) } static int -bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio) +bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bio, bool failover) { struct spdk_io_channel *ch; struct nvme_io_channel *nvme_ch; + struct nvme_bdev_ctrlr_trid *next_trid = NULL, *tmp_trid = NULL; + int rc = 0; pthread_mutex_lock(&g_bdev_nvme_mutex); if (nvme_bdev_ctrlr->destruct) { @@ -462,9 +466,25 @@ bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bi return 0; } + if (failover) { + tmp_trid = TAILQ_FIRST(&nvme_bdev_ctrlr->trids); + assert(tmp_trid); + assert(&tmp_trid->trid == nvme_bdev_ctrlr->connected_trid); + next_trid = TAILQ_NEXT(tmp_trid, link); + if (!next_trid) { + failover = false; + } + } + if (!nvme_bdev_ctrlr->resetting) { nvme_bdev_ctrlr->resetting = true; + if (failover) { + nvme_bdev_ctrlr->failover_in_progress = true; + } } else { + if (next_trid && !nvme_bdev_ctrlr->failover_in_progress) { + rc = -EAGAIN; + } pthread_mutex_unlock(&g_bdev_nvme_mutex); SPDK_NOTICELOG("Unable to perform reset, already in progress.\n"); /* @@ -479,7 +499,19 @@ bdev_nvme_reset(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, struct nvme_bdev_io *bi TAILQ_INSERT_TAIL(&nvme_ch->pending_resets, spdk_bdev_io_from_ctx(bio), module_link); spdk_put_io_channel(ch); } - return 0; + return rc; + } + + if (failover) { + spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); + nvme_bdev_ctrlr->connected_trid = &next_trid->trid; + rc = spdk_nvme_ctrlr_set_trid(nvme_bdev_ctrlr->ctrlr, &next_trid->trid); + assert(rc == 0); + /** Shuffle the old trid to the end of the list and use the new one. + * Allows for round robin through multiple connections. + */ + TAILQ_REMOVE(&nvme_bdev_ctrlr->trids, tmp_trid, link); + TAILQ_INSERT_TAIL(&nvme_bdev_ctrlr->trids, tmp_trid, link); } pthread_mutex_unlock(&g_bdev_nvme_mutex); @@ -593,7 +625,7 @@ _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_ bdev_io->u.bdev.num_blocks); case SPDK_BDEV_IO_TYPE_RESET: - return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io); + return bdev_nvme_reset(nbdev->nvme_bdev_ctrlr, nbdev_io, false); case SPDK_BDEV_IO_TYPE_FLUSH: return bdev_nvme_flush(nbdev, @@ -1138,7 +1170,7 @@ nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) SPDK_WARNLOG("Abort failed. Resetting controller.\n"); nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); assert(nvme_bdev_ctrlr != NULL); - bdev_nvme_reset(nvme_bdev_ctrlr, NULL); + bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); } } @@ -1157,7 +1189,7 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, SPDK_ERRLOG("Controller Fatal Status, reset required\n"); nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); assert(nvme_bdev_ctrlr != NULL); - bdev_nvme_reset(nvme_bdev_ctrlr, NULL); + bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); return; } @@ -1177,7 +1209,7 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET: nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(spdk_nvme_ctrlr_get_transport_id(ctrlr)); assert(nvme_bdev_ctrlr != NULL); - bdev_nvme_reset(nvme_bdev_ctrlr, NULL); + bdev_nvme_reset(nvme_bdev_ctrlr, NULL, false); break; case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE: SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n"); diff --git a/module/bdev/nvme/common.h b/module/bdev/nvme/common.h index e7f6567ca..d2bc6adf3 100644 --- a/module/bdev/nvme/common.h +++ b/module/bdev/nvme/common.h @@ -85,6 +85,7 @@ struct nvme_bdev_ctrlr { char *name; int ref; bool resetting; + bool failover_in_progress; bool destruct; /** * PI check flags. This flags is set to NVMe controllers created only