nvme: fix the aer request sent to disabled controller

The purpose this patch is to fix the following issue: https://github.com/spdk/spdk/issues/568. The root cause of issue is in nvme_rdma_fail_qpair since we want to recycle all outstanding rdma_reqs. There is an aer req, the callback of which is: nvme_ctrlr_async_event_cb. In this function, we will call nvme_ctrlr_construct_and_submit_aer again, however the nvme controller is already in shutdown state. (The ctrlr->vcprop.cc.bits.en is set to 0). Change-Id: I422f0fe5faf472e9a1cb6bbd174e806e6405b95c Signed-off-by: Ziye Yang <ziye.yang@intel.com> Reviewed-on: https://review.gerrithub.io/c/440014 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
2019-01-12 02:04:52 +08:00 · 2019-01-12 02:04:52 +08:00 · 3608464f04
commit 3608464f04
parent da992ebcc4
2 changed files with 9 additions and 0 deletions
--- a/lib/nvme/nvme_ctrlr.c
+++ b/lib/nvme/nvme_ctrlr.c
@ -535,6 +535,7 @@ nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
 		if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n",
 				      ms_waited);
+			ctrlr->is_shutdown = true;
 			return;
 		}

@ -1520,6 +1521,11 @@ nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
 	}

+	/* If the ctrlr is already shutdown, we should not send aer again */
+	if (ctrlr->is_shutdown) {
+		return;
+	}
+
 	/*
 	 * Repost another asynchronous event request to replace the one
 	 *  that just completed.
@ -2198,6 +2204,7 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
 	ctrlr->free_io_qids = NULL;
 	ctrlr->is_resetting = false;
 	ctrlr->is_failed = false;
+	ctrlr->is_shutdown = false;

 	TAILQ_INIT(&ctrlr->active_io_qpairs);
 	STAILQ_INIT(&ctrlr->queued_aborts);
--- a/lib/nvme/nvme_internal.h
+++ b/lib/nvme/nvme_internal.h
@ -602,6 +602,8 @@ struct spdk_nvme_ctrlr {

 	bool				is_failed;

+	bool				is_shutdown;
+
 	bool				timeout_enabled;

 	uint16_t			max_sges;