From ba6f6c5eb05da594d6b0e04f9e8801f6e873a21d Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 7 Apr 2021 21:04:14 +0000 Subject: [PATCH] nvmf: wait for qpair disconnect cbs when removing subsys nvmf_poll_group_remove_subsystem_msg() disconnects all qpairs associated with controllers in the specified subsystem. If it finds any controllers that need to be disconnected, it sends a message to the running thread to execute the same function again later. But when it runs again later, the qpair may no longer be in the poll group, but there could still be outstanding messages being sent between threads. For example, _nvmf_qpair_destroy() needs to send a message to the ctrlr->thread to clear the qpair mask bit. All of this could result in the nvmf target starting to destroy poll groups prematurely. Destroy poll groups results in the nvmf spdk_threads exiting. If there are still messages being processed from the STOP_SUBSYSTEMS target state, we can get use-after-free errors since processing of those messages could access freed memory associated with the exited thread. Fixes issue #1850. Signed-off-by: Jim Harris Change-Id: I1e63b9addb2956495a69b5108a41e029f6f9a85d Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7275 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Aleksey Marchuk Reviewed-by: Reviewed-by: Shuhei Matsumoto --- lib/nvmf/nvmf.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c index 8cfe16e7e..05aaff048 100644 --- a/lib/nvmf/nvmf.c +++ b/lib/nvmf/nvmf.c @@ -76,6 +76,7 @@ struct nvmf_qpair_disconnect_many_ctx { struct spdk_nvmf_poll_group *group; spdk_nvmf_poll_group_mod_done cpl_fn; void *cpl_ctx; + uint32_t count; }; static void @@ -1360,6 +1361,18 @@ fini: } } +static void +remove_subsystem_qpair_cb(void *ctx) +{ + struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx; + + assert(qpair_ctx->count > 0); + qpair_ctx->count--; + if (qpair_ctx->count == 0) { + _nvmf_poll_group_remove_subsystem_cb(ctx, 0); + } +} + static void nvmf_poll_group_remove_subsystem_msg(void *ctx) { @@ -1368,29 +1381,30 @@ nvmf_poll_group_remove_subsystem_msg(void *ctx) struct spdk_nvmf_poll_group *group; struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx; int rc = 0; - bool have_qpairs = false; group = qpair_ctx->group; subsystem = qpair_ctx->subsystem; + /* Initialize count to 1. This acts like a ref count, to ensure that if spdk_nvmf_qpair_disconnect + * immediately invokes the callback (i.e. the qpairs is already in process of being disconnected) + * that we don't prematurely call _nvmf_poll_group_remove_subsystem_cb() before we've + * iterated the full list of qpairs. + */ + qpair_ctx->count = 1; TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, qpair_tmp) { if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) { - /* Use another variable to check if there were any qpairs disconnected in this call since - * we can loop over all qpairs and iterator will be NULL in the end */ - have_qpairs = true; - rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + qpair_ctx->count++; + rc = spdk_nvmf_qpair_disconnect(qpair, remove_subsystem_qpair_cb, ctx); if (rc) { break; } } } + qpair_ctx->count--; - if (!have_qpairs || rc) { + if (qpair_ctx->count == 0 || rc) { _nvmf_poll_group_remove_subsystem_cb(ctx, rc); - return; } - - spdk_thread_send_msg(spdk_get_thread(), nvmf_poll_group_remove_subsystem_msg, ctx); } void