From 3fcee8ddcc827d210e531b7500cf6a55eead3f2e Mon Sep 17 00:00:00 2001 From: Vasuki Manikarnike Date: Fri, 21 Oct 2022 15:57:41 -0700 Subject: [PATCH] lib/nvme: Do not submit queued aborts if adminq is in failed state. With RDMA, the admin poller can experience a remote disconnect when processing completions. The admin qpair will be disconnected to handle this. The disconnect code path will manually complete queued aborts. However, the completion callback for the abort will attempt to resubmit other queued aborts from the queue, which will result in a very large stack and can eventually cause a segfault. The fix is to not resubmit queued aborts if the admin qpair is in any kind of failed state. Change-Id: I4a6f959232c8a1bd30c87ca50459014e556cbaa0 Signed-off-by: Vasuki Manikarnike Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15114 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Aleksey Marchuk Reviewed-by: Michael Haeuptle --- lib/nvme/nvme_ctrlr_cmd.c | 7 +++++++ test/unit/lib/nvme/nvme_ctrlr_cmd.c/nvme_ctrlr_cmd_ut.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/lib/nvme/nvme_ctrlr_cmd.c b/lib/nvme/nvme_ctrlr_cmd.c index 093fddab2..6056bc5e3 100644 --- a/lib/nvme/nvme_ctrlr_cmd.c +++ b/lib/nvme/nvme_ctrlr_cmd.c @@ -4,6 +4,7 @@ */ #include "nvme_internal.h" +#include "spdk/nvme.h" int spdk_nvme_ctrlr_io_cmd_raw_no_payload_build(struct spdk_nvme_ctrlr *ctrlr, @@ -546,6 +547,12 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) int rc; if (ctrlr->is_resetting || ctrlr->is_destructed || ctrlr->is_failed) { + /* Don't resubmit aborts if ctrlr is failing */ + return; + } + + if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(ctrlr) != SPDK_NVME_QPAIR_FAILURE_NONE) { + /* Don't resubmit aborts if admin qpair is failed */ return; } diff --git a/test/unit/lib/nvme/nvme_ctrlr_cmd.c/nvme_ctrlr_cmd_ut.c b/test/unit/lib/nvme/nvme_ctrlr_cmd.c/nvme_ctrlr_cmd_ut.c index 0bc02d670..05be242b2 100644 --- a/test/unit/lib/nvme/nvme_ctrlr_cmd.c/nvme_ctrlr_cmd_ut.c +++ b/test/unit/lib/nvme/nvme_ctrlr_cmd.c/nvme_ctrlr_cmd_ut.c @@ -51,6 +51,9 @@ DEFINE_STUB(nvme_transport_qpair_iterate_requests, int, DEFINE_STUB(nvme_qpair_abort_queued_reqs_with_cbarg, uint32_t, (struct spdk_nvme_qpair *qpair, void *cmd_cb_arg), 0); +DEFINE_STUB(spdk_nvme_ctrlr_get_admin_qp_failure_reason, spdk_nvme_qp_failure_reason, + (struct spdk_nvme_ctrlr *ctrlr), 0); + static int nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2) {