nvme/rdma: Fix searching for qpair by qp_num

Poll group holds lists of qpairs in different states and
when we got rdma completion with error, we iterate these
lists to find a qpair which qp_num matches. qp_num
is stored inside of ibv_qp which belongs to spdk_rdma_qp
structure. When nvme_rdma_qpair is disconnected, pointer
to spdk_rdma_qp is cleaned but qpair may still exist in
poll group list and when we start searhing for qpair by
qp_num we may dereference NULL pointer.

This patch adds a check that pointer to spdk_rdma_qp
is valid before dereferencing it. To minimize boilerplate code,
wrap all check in macro. Add unit test to verify this fix.

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: I1925f93efb633fd5c176323d3bbd3641a1a632a9
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9050
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Alexey Marchuk 2021-08-03 11:52:18 +03:00 committed by Tomasz Zawadzki
parent 97385af196
commit 00277bc5d5
2 changed files with 61 additions and 11 deletions

View File

@ -104,6 +104,9 @@
#define WC_PER_QPAIR(queue_depth) (queue_depth * 2) #define WC_PER_QPAIR(queue_depth) (queue_depth * 2)
#define NVME_RDMA_POLL_GROUP_CHECK_QPN(_rqpair, qpn) \
((_rqpair)->rdma_qp && (_rqpair)->rdma_qp->qp->qp_num == (qpn)) \
struct nvme_rdma_memory_domain { struct nvme_rdma_memory_domain {
TAILQ_ENTRY(nvme_rdma_memory_domain) link; TAILQ_ENTRY(nvme_rdma_memory_domain) link;
uint32_t ref; uint32_t ref;
@ -2504,22 +2507,21 @@ nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, uint32_
STAILQ_FOREACH(qpair, &group->group.disconnected_qpairs, poll_group_stailq) { STAILQ_FOREACH(qpair, &group->group.disconnected_qpairs, poll_group_stailq) {
rqpair = nvme_rdma_qpair(qpair); rqpair = nvme_rdma_qpair(qpair);
if (rqpair->rdma_qp->qp->qp_num == qp_num) { if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair, qp_num)) {
return rqpair; return rqpair;
} }
} }
STAILQ_FOREACH(qpair, &group->group.connected_qpairs, poll_group_stailq) { STAILQ_FOREACH(qpair, &group->group.connected_qpairs, poll_group_stailq) {
rqpair = nvme_rdma_qpair(qpair); rqpair = nvme_rdma_qpair(qpair);
if (rqpair->rdma_qp->qp->qp_num == qp_num) { if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair, qp_num)) {
return rqpair; return rqpair;
} }
} }
STAILQ_FOREACH(rqpair_tracker, &group->destroyed_qpairs, link) { STAILQ_FOREACH(rqpair_tracker, &group->destroyed_qpairs, link) {
rqpair = rqpair_tracker->destroyed_qpair_tracker; if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair_tracker->destroyed_qpair_tracker, qp_num)) {
if (rqpair->rdma_qp->qp->qp_num == qp_num) { return rqpair_tracker->destroyed_qpair_tracker;
return rqpair;
} }
} }

View File

@ -1237,15 +1237,15 @@ test_rdma_ctrlr_get_memory_domain(void)
static void static void
test_rdma_get_memory_translation(void) test_rdma_get_memory_translation(void)
{ {
struct ibv_qp qp = { .pd = (struct ibv_pd *)0xfeedbeef }; struct ibv_qp qp = {.pd = (struct ibv_pd *) 0xfeedbeef};
struct spdk_rdma_qp rdma_qp = { .qp = &qp }; struct spdk_rdma_qp rdma_qp = {.qp = &qp};
struct nvme_rdma_qpair rqpair = { .rdma_qp = &rdma_qp }; struct nvme_rdma_qpair rqpair = {.rdma_qp = &rdma_qp};
struct spdk_nvme_ns_cmd_ext_io_opts io_opts = { struct spdk_nvme_ns_cmd_ext_io_opts io_opts = {
.memory_domain = (struct spdk_memory_domain *)0xdeaddead .memory_domain = (struct spdk_memory_domain *) 0xdeaddead
}; };
struct nvme_request req = { .payload = { .opts = &io_opts } }; struct nvme_request req = {.payload = {.opts = &io_opts}};
struct nvme_rdma_memory_translation_ctx ctx = { struct nvme_rdma_memory_translation_ctx ctx = {
.addr = (void *)0xBAADF00D, .addr = (void *) 0xBAADF00D,
.length = 0x100 .length = 0x100
}; };
int rc; int rc;
@ -1286,6 +1286,53 @@ test_rdma_get_memory_translation(void)
nvme_rdma_put_memory_domain(rqpair.memory_domain); nvme_rdma_put_memory_domain(rqpair.memory_domain);
} }
static void
test_nvme_rdma_poll_group_get_qpair_by_id(void)
{
const uint32_t test_qp_num = 123;
struct nvme_rdma_poll_group group = {};
struct nvme_rdma_destroyed_qpair tracker = {};
struct nvme_rdma_qpair rqpair = {};
struct spdk_rdma_qp rdma_qp = {};
struct ibv_qp qp = { .qp_num = test_qp_num };
STAILQ_INIT(&group.group.disconnected_qpairs);
STAILQ_INIT(&group.group.connected_qpairs);
STAILQ_INIT(&group.destroyed_qpairs);
rqpair.qpair.trtype = SPDK_NVME_TRANSPORT_RDMA;
tracker.destroyed_qpair_tracker = &rqpair;
/* Test 1 - Simulate case when nvme_rdma_qpair is disconnected but still in one of lists.
* nvme_rdma_poll_group_get_qpair_by_id must return NULL */
STAILQ_INSERT_HEAD(&group.group.disconnected_qpairs, &rqpair.qpair, poll_group_stailq);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
STAILQ_REMOVE_HEAD(&group.group.disconnected_qpairs, poll_group_stailq);
STAILQ_INSERT_HEAD(&group.group.connected_qpairs, &rqpair.qpair, poll_group_stailq);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
STAILQ_REMOVE_HEAD(&group.group.connected_qpairs, poll_group_stailq);
STAILQ_INSERT_HEAD(&group.destroyed_qpairs, &tracker, link);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
STAILQ_REMOVE_HEAD(&group.destroyed_qpairs, link);
/* Test 2 - nvme_rdma_qpair with valid rdma_qp/ibv_qp and qp_num */
rdma_qp.qp = &qp;
rqpair.rdma_qp = &rdma_qp;
STAILQ_INSERT_HEAD(&group.group.disconnected_qpairs, &rqpair.qpair, poll_group_stailq);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
STAILQ_REMOVE_HEAD(&group.group.disconnected_qpairs, poll_group_stailq);
STAILQ_INSERT_HEAD(&group.group.connected_qpairs, &rqpair.qpair, poll_group_stailq);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
STAILQ_REMOVE_HEAD(&group.group.connected_qpairs, poll_group_stailq);
STAILQ_INSERT_HEAD(&group.destroyed_qpairs, &tracker, link);
CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
STAILQ_REMOVE_HEAD(&group.destroyed_qpairs, link);
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
CU_pSuite suite = NULL; CU_pSuite suite = NULL;
@ -1317,6 +1364,7 @@ int main(int argc, char **argv)
CU_ADD_TEST(suite, test_nvme_rdma_memory_domain); CU_ADD_TEST(suite, test_nvme_rdma_memory_domain);
CU_ADD_TEST(suite, test_rdma_ctrlr_get_memory_domain); CU_ADD_TEST(suite, test_rdma_ctrlr_get_memory_domain);
CU_ADD_TEST(suite, test_rdma_get_memory_translation); CU_ADD_TEST(suite, test_rdma_get_memory_translation);
CU_ADD_TEST(suite, test_nvme_rdma_poll_group_get_qpair_by_id);
CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests(); CU_basic_run_tests();