From 961cd6ab7e1090bdabbdbb67b37c02409f02173c Mon Sep 17 00:00:00 2001 From: Seth Howell Date: Fri, 1 Feb 2019 14:03:05 -0700 Subject: [PATCH] rdma: register a poller to destroy defunct qpairs Not all RDMA drivers fail back the dummy recv and send operations that we send to them when destroying a qpair. We still need to free the resources from these qpairs to avoid eating up all of the system memory after multiple connect and disconnect events. Since we won't be getting any more completions, the best heuristic we can use is waiting a long time and then freeing the resources. qpair_fini is only called from the proper polling thread so we can safely call process_pending to flush the qpair before closing it out. Change-Id: I61e6931d7316d1e78bad26657bb671aa451e29f4 Signed-off-by: Seth Howell Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/443057 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- lib/nvmf/rdma.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index 3cf994fa4..722571dd1 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -64,6 +64,9 @@ struct spdk_nvme_rdma_hooks g_nvmf_hooks = {}; #define DEFAULT_NVMF_RDMA_CQ_SIZE 4096 #define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2) +/* Timeout for destroying defunct rqpairs */ +#define NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US 4000000 + enum spdk_nvmf_rdma_request_state { /* The request is not currently in use */ RDMA_REQUEST_STATE_FREE = 0, @@ -343,6 +346,12 @@ struct spdk_nvmf_rdma_qpair { struct spdk_nvmf_rdma_wr drain_send_wr; struct spdk_nvmf_rdma_wr drain_recv_wr; + /* Poller registered in case the qpair doesn't properly + * complete the qpair destruct process and becomes defunct. + */ + + struct spdk_poller *destruct_poller; + /* There are several ways a disconnect can start on a qpair * and they are not all mutually exclusive. It is important * that we only initialize one of these paths. @@ -584,6 +593,8 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) { spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0); + spdk_poller_unregister(&rqpair->destruct_poller); + if (rqpair->qd != 0) { nvmf_rdma_dump_qpair_contents(rqpair); SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd); @@ -2658,6 +2669,19 @@ spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req) return 0; } +static int +spdk_nvmf_rdma_destroy_defunct_qpair(void *ctx) +{ + struct spdk_nvmf_rdma_qpair *rqpair = ctx; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, + struct spdk_nvmf_rdma_transport, transport); + + spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, true); + spdk_nvmf_rdma_qpair_destroy(rqpair); + + return 0; +} + static void spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair) { @@ -2707,6 +2731,9 @@ spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair) return; } rqpair->current_send_depth++; + + rqpair->destruct_poller = spdk_poller_register(spdk_nvmf_rdma_destroy_defunct_qpair, (void *)rqpair, + NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US); } #ifdef DEBUG