From 6b87dd80236ef3971ed5d261f6d7d8278d8738c4 Mon Sep 17 00:00:00 2001 From: Seth Howell Date: Fri, 1 Nov 2019 15:31:32 -0700 Subject: [PATCH] nvme_rdma: Detect stale connection failures. This is the first step in properly reconnecting after a hard power off event. Change-Id: I9739bffacd66ec6d9f8f1d376bf42291c84f90f2 Signed-off-by: Seth Howell Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/473061 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: Jim Harris --- lib/nvme/nvme_rdma.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index ceea974b0..fd50c64e0 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -362,6 +362,21 @@ nvme_rdma_validate_cm_event(enum rdma_cm_event_type expected_evt_type, return 0; } + switch (expected_evt_type) { + case RDMA_CM_EVENT_ESTABLISHED: + /* + * There is an enum ib_cm_rej_reason in the kernel headers that sets 10 as + * IB_CM_REJ_STALE_CONN. I can't find the corresponding userspace but we get + * the same values here. + */ + if (reaped_evt->event == RDMA_CM_EVENT_REJECTED && reaped_evt->status == 10) { + rc = -ESTALE; + } + break; + default: + break; + } + SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n", nvme_rdma_cm_event_str_get(expected_evt_type), nvme_rdma_cm_event_str_get(reaped_evt->event), reaped_evt->event,