nvmf: clean sgroup->queued in _nvmf_qpair_destroy when ctrlr is NULL

Let us consider the following process:
  1. one fabric connect request A comes but the subsystem is paused
     due to adding/removing ns or other operations, so this request A
     will be put into sgroup->queued until the subsystem becomes active;
  2. the subsystem is paused for a long time until the connect timeout,
     related qpair is destroyed, the sgroup->queued will not be cleaned
     because qpair's ctrlr is NULL;
  3. if a new request B comes, it is more likely to be allocated to the
     same memory as the previous fabric command request. And it will be
     put into sgroup->queued again, where has already exists the exactly
     same pointer with request B.

This leads to the pointer hanging problem and it will cause infinitely
loop when traversing sgroup->queued!

So this patch avoids the ptr-hanging problem by checking and cleaning
all sgroups queued req whose qpair is the being destroyed qpair in
_nvmf_qpair_destroy when ctrlr is NULL.

This problem is already described in issue #2133.

Signed-off-by: Peng Lian<peng.lian@smartx.com>
Change-Id: I909d673b5050f21fa193914cc4ffe6634232fa7d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17147
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
This commit is contained in:
Peng Lian 2023-03-17 03:02:24 -04:00 committed by Konrad Sztyber
parent c64ce716e4
commit b13ee3005d

View File

@ -1168,14 +1168,29 @@ spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair)
qpair->group = NULL; qpair->group = NULL;
} }
static void
_nvmf_qpair_sgroup_req_clean(struct spdk_nvmf_subsystem_poll_group *sgroup,
const struct spdk_nvmf_qpair *qpair)
{
struct spdk_nvmf_request *req, *tmp;
TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
if (req->qpair == qpair) {
TAILQ_REMOVE(&sgroup->queued, req, link);
if (nvmf_transport_req_free(req)) {
SPDK_ERRLOG("Transport request free error!\n");
}
}
}
}
static void static void
_nvmf_qpair_destroy(void *ctx, int status) _nvmf_qpair_destroy(void *ctx, int status)
{ {
struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx; struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair; struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr; struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
struct spdk_nvmf_request *req, *tmp;
struct spdk_nvmf_subsystem_poll_group *sgroup; struct spdk_nvmf_subsystem_poll_group *sgroup;
uint32_t sid;
assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING); assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
qpair_ctx->qid = qpair->qid; qpair_ctx->qid = qpair->qid;
@ -1196,13 +1211,12 @@ _nvmf_qpair_destroy(void *ctx, int status)
if (ctrlr) { if (ctrlr) {
sgroup = &qpair->group->sgroups[ctrlr->subsys->id]; sgroup = &qpair->group->sgroups[ctrlr->subsys->id];
TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) { _nvmf_qpair_sgroup_req_clean(sgroup, qpair);
if (req->qpair == qpair) { } else {
TAILQ_REMOVE(&sgroup->queued, req, link); for (sid = 0; sid < qpair->group->num_sgroups; sid++) {
if (nvmf_transport_req_free(req)) { sgroup = &qpair->group->sgroups[sid];
SPDK_ERRLOG("Transport request free error!\n"); assert(sgroup != NULL);
} _nvmf_qpair_sgroup_req_clean(sgroup, qpair);
}
} }
} }