From 20cf90801e5ae5ea6b942e99381603094ba5b838 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Tue, 22 Feb 2022 12:28:09 +0900 Subject: [PATCH] nvme_rdma: Handle stale connection asynchronously Include delayed disconnect/connect retries with finite times into the state machine of asynchronous qpair connnection. We do not need to call back to the common transport layer but we need to do the following, clear rqpair->cq before starting disconnection if qpair uses poll group, and clear qpair->transport_failure_reason after disconnected. Additionally locate the new state STALE_CONN before INITIALIZING because cq is not ready to use for admin qpair when the state is STALE_CONN. Signed-off-by: Shuhei Matsumoto Change-Id: Ibc779a2b772be9506ffd8226d5f64d6d12102ff2 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11690 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk --- lib/nvme/nvme_rdma.c | 81 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index 6499388a5..88f7ff5fe 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -74,6 +74,13 @@ /* The default size for a shared rdma completion queue. */ #define DEFAULT_NVME_RDMA_CQ_SIZE 4096 +/* + * In the special case of a stale connection we don't expose a mechanism + * for the user to retry the connection so we need to handle it internally. + */ +#define NVME_RDMA_STALE_CONN_RETRY_MAX 5 +#define NVME_RDMA_STALE_CONN_RETRY_DELAY_US 10000 + /* * Maximum value of transport_retry_count used by RDMA controller */ @@ -186,6 +193,7 @@ union nvme_rdma_mr { enum nvme_rdma_qpair_state { NVME_RDMA_QPAIR_STATE_INVALID = 0, + NVME_RDMA_QPAIR_STATE_STALE_CONN, NVME_RDMA_QPAIR_STATE_INITIALIZING, NVME_RDMA_QPAIR_STATE_FABRIC_CONNECT_SEND, NVME_RDMA_QPAIR_STATE_FABRIC_CONNECT_POLL, @@ -259,6 +267,8 @@ struct nvme_rdma_qpair { /* Used by poll group to keep the qpair around until it is ready to remove it. */ bool defer_deletion_to_pg; + + uint8_t stale_conn_retry_count; }; enum NVME_RDMA_COMPLETION_FLAGS { @@ -1225,10 +1235,14 @@ nvme_rdma_resolve_addr(struct nvme_rdma_qpair *rqpair, nvme_rdma_addr_resolved); } +static int nvme_rdma_stale_conn_retry(struct nvme_rdma_qpair *rqpair); + static int nvme_rdma_connect_established(struct nvme_rdma_qpair *rqpair, int ret) { - if (ret) { + if (ret == -ESTALE) { + return nvme_rdma_stale_conn_retry(rqpair); + } else if (ret) { SPDK_ERRLOG("RDMA connect error %d\n", ret); return ret; } @@ -1406,6 +1420,18 @@ nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qp return 0; } +static int +nvme_rdma_stale_conn_reconnect(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_qpair *qpair = &rqpair->qpair; + + if (spdk_get_ticks() < rqpair->evt_timeout_ticks) { + return -EAGAIN; + } + + return nvme_rdma_ctrlr_connect_qpair(qpair->ctrlr, qpair); +} + static int nvme_rdma_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) @@ -1425,6 +1451,7 @@ nvme_rdma_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, break; case NVME_RDMA_QPAIR_STATE_INITIALIZING: + case NVME_RDMA_QPAIR_STATE_EXITING: if (!nvme_qpair_is_admin_queue(qpair)) { nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); } @@ -1442,6 +1469,12 @@ nvme_rdma_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, return rc; + case NVME_RDMA_QPAIR_STATE_STALE_CONN: + rc = nvme_rdma_stale_conn_reconnect(rqpair); + if (rc == 0) { + rc = -EAGAIN; + } + break; case NVME_RDMA_QPAIR_STATE_FABRIC_CONNECT_SEND: rc = nvme_fabric_qpair_connect_async(qpair, rqpair->num_entries + 1); if (rc == 0) { @@ -2068,6 +2101,52 @@ nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme } } +static int +nvme_rdma_stale_conn_disconnected(struct nvme_rdma_qpair *rqpair, int ret) +{ + struct spdk_nvme_qpair *qpair = &rqpair->qpair; + + if (ret) { + SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n"); + } + + nvme_rdma_qpair_destroy(rqpair); + + qpair->last_transport_failure_reason = qpair->transport_failure_reason; + qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE; + + rqpair->state = NVME_RDMA_QPAIR_STATE_STALE_CONN; + rqpair->evt_timeout_ticks = (NVME_RDMA_STALE_CONN_RETRY_DELAY_US * spdk_get_ticks_hz()) / + SPDK_SEC_TO_USEC + spdk_get_ticks(); + + return 0; +} + +static int +nvme_rdma_stale_conn_retry(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_qpair *qpair = &rqpair->qpair; + + if (rqpair->stale_conn_retry_count >= NVME_RDMA_STALE_CONN_RETRY_MAX) { + SPDK_ERRLOG("Retry failed %d times, give up stale connection to qpair (cntlid:%u, qid:%u).\n", + NVME_RDMA_STALE_CONN_RETRY_MAX, qpair->ctrlr->cntlid, qpair->id); + return -ESTALE; + } + + rqpair->stale_conn_retry_count++; + + SPDK_NOTICELOG("%d times, retry stale connnection to qpair (cntlid:%u, qid:%u).\n", + rqpair->stale_conn_retry_count, qpair->ctrlr->cntlid, qpair->id); + + if (qpair->poll_group) { + rqpair->cq = NULL; + } + + _nvme_rdma_ctrlr_disconnect_qpair(qpair->ctrlr, qpair, nvme_rdma_stale_conn_disconnected); + + return 0; +} + static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); static int