nvme_rdma: Fix null pointer access and memory leaks for rqpair->reqs and rsps

Supporting SRQ caused two kinds of memory leaks. Fix both in this patch.

1. rqpair->rsps was leaked and null pointer access occurred

An error was detected during the nightly nvmf_delete_subsystem test.
The NVMe perf tool crashed with SIGABRT.

The reason of the crash was

nvme_rdma.c:2504:2: runtime error: member access within null pointer of type 'struct nvme_rdma_rsps'

This was caused by clearing rqpair->rsps before freeing rqpair->rsps.
rqpair->rsps should have been held until rqpair->rsps is freed. However,
when we support SRQ, rqpair->rsps was cleared when releasing rqpair->poller
by mistake. rqpair->rsps should be cleared only if SRQ is enabled because
in this case rqpair uses rsps of rqpair->poller.

2. rqpair->reqs and rsps are leaked for admin qpair at controller reset

To avoid unnecessary alloc and free for rqpair->rsps when enabling SRQ,
nvme_rdma_create_reqs() and nvme_rdma_create_rsps() were moved to
nvme_rdma_connect_established().

On the other hand, nvme_rdma_free_reqs() and nvme_rdma_free_rsps() were
called by nvme_rdma_ctrlr_delete_io_qpair().

However, at controller reset, admin qpair was just disconnected and
reconnected. In this case, nvme_rdma_create_reqs() and
nvme_rdma_create_rsps() were called again without calling
nvme_rdma_free_reqs() and nvme_rdma_free_rsps().

Hence, memory leak occurred.

To fix the memory leak, move nvme_rdma_free_reqs() and nvme_rdma_free_rsps()
from nvme_rdma_ctrlr_delete_io_qpair() to nvme_rdma_qpair_destroy().

One of the fixes fot the issue #2874

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I167ba908cff73d7a0be2248affce4c54f233da51
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16384
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Shuhei Matsumoto 2023-01-20 20:49:53 +09:00 committed by Tomasz Zawadzki
parent 95aa1a7337
commit 9aabfb59d9

View File

@ -723,7 +723,9 @@ nvme_rdma_qpair_set_poller(struct spdk_nvme_qpair *qpair)
rqpair->cq = poller->cq;
rqpair->srq = poller->srq;
rqpair->rsps = poller->rsps;
if (rqpair->srq) {
rqpair->rsps = poller->rsps;
}
rqpair->poller = poller;
return 0;
}
@ -982,6 +984,7 @@ nvme_rdma_create_reqs(struct nvme_rdma_qpair *rqpair)
uint16_t i;
int rc;
assert(!rqpair->rdma_reqs);
rqpair->rdma_reqs = spdk_zmalloc(rqpair->num_entries * sizeof(struct spdk_nvme_rdma_req), 0, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
if (rqpair->rdma_reqs == NULL) {
@ -989,6 +992,7 @@ nvme_rdma_create_reqs(struct nvme_rdma_qpair *rqpair)
goto fail;
}
assert(!rqpair->cmds);
rqpair->cmds = spdk_zmalloc(rqpair->num_entries * sizeof(*rqpair->cmds), 0, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
if (!rqpair->cmds) {
@ -996,7 +1000,6 @@ nvme_rdma_create_reqs(struct nvme_rdma_qpair *rqpair)
goto fail;
}
TAILQ_INIT(&rqpair->free_reqs);
TAILQ_INIT(&rqpair->outstanding_reqs);
for (i = 0; i < rqpair->num_entries; i++) {
@ -1145,6 +1148,7 @@ nvme_rdma_connect_established(struct nvme_rdma_qpair *rqpair, int ret)
return ret;
}
assert(!rqpair->mr_map);
rqpair->mr_map = spdk_rdma_create_mem_map(rqpair->rdma_qp->qp->pd, &g_nvme_hooks,
SPDK_RDMA_MEMORY_MAP_ROLE_INITIATOR);
if (!rqpair->mr_map) {
@ -1166,6 +1170,7 @@ nvme_rdma_connect_established(struct nvme_rdma_qpair *rqpair, int ret)
opts.srq = NULL;
opts.mr_map = rqpair->mr_map;
assert(!rqpair->rsps);
rqpair->rsps = nvme_rdma_create_rsps(&opts);
if (!rqpair->rsps) {
SPDK_ERRLOG("Unable to create rqpair RDMA responses\n");
@ -1899,12 +1904,18 @@ nvme_rdma_qpair_destroy(struct nvme_rdma_qpair *rqpair)
rqpair->poller = NULL;
rqpair->cq = NULL;
rqpair->srq = NULL;
rqpair->rsps = NULL;
if (rqpair->srq) {
rqpair->srq = NULL;
rqpair->rsps = NULL;
}
} else if (rqpair->cq) {
ibv_destroy_cq(rqpair->cq);
rqpair->cq = NULL;
}
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair->rsps);
rqpair->rsps = NULL;
}
static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
@ -2115,8 +2126,6 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
nvme_rdma_put_memory_domain(rqpair->memory_domain);
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair->rsps);
spdk_free(rqpair);
return 0;