From ae8a1c5805d7beb1025f44c499daa75b45ae1f35 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Wed, 28 Jul 2021 17:21:24 +0300 Subject: [PATCH] nvmf: Update controller desctruction process There is a race condition between controller destruction and subsystem state change, e.g. admin qpair may already be freed when a namespace is added or removed. As result in function poll_group_update_subsystem we may get heap-use-after-free error Another problem is that some qpair's live time may exceed controller's life time. To avoid it, start controller destruction process when the last qpair finished the disconnect process (previously controller started the descruction process before the last qpair starts to disconnect and it could lead to raise conditions) Fixes #2055 Change-Id: I11612979eb914a5fdcc7b9e3c812bf1e450b6120 Signed-off-by: Alexey Marchuk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8963 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Matt Dumm Reviewed-by: Jim Harris Reviewed-by: Ben Walker --- lib/nvmf/ctrlr.c | 15 ++++++---- lib/nvmf/nvmf.c | 67 ++++++++++++++++++++++++++------------------ lib/nvmf/subsystem.c | 2 +- 3 files changed, 50 insertions(+), 34 deletions(-) diff --git a/lib/nvmf/ctrlr.c b/lib/nvmf/ctrlr.c index e40db404b..c2bf309f8 100644 --- a/lib/nvmf/ctrlr.c +++ b/lib/nvmf/ctrlr.c @@ -921,10 +921,12 @@ nvmf_ctrlr_association_remove(void *ctx) SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n", ctrlr->subsys->subnqn); - rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL); - if (rc < 0) { - SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n"); - assert(false); + if (ctrlr->admin_qpair) { + rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL); + if (rc < 0) { + SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n"); + assert(false); + } } return SPDK_POLLER_BUSY; @@ -2390,6 +2392,7 @@ spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr, nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip); + assert(ctrlr->admin_qpair); /* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */ max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size / (1U << nsdata->lbaf[nsdata->flbas.format].lbads); @@ -2440,11 +2443,13 @@ int spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata) { struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys; - struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport; + struct spdk_nvmf_transport *transport; /* * Common fields for discovery and NVM subsystems */ + assert(ctrlr->admin_qpair); + transport = ctrlr->admin_qpair->transport; spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' '); assert((transport->opts.max_io_size % 4096) == 0); cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096); diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c index 7af3a9da1..c8c239044 100644 --- a/lib/nvmf/nvmf.c +++ b/lib/nvmf/nvmf.c @@ -930,26 +930,6 @@ _nvmf_ctrlr_destruct(void *ctx) nvmf_ctrlr_destruct(ctrlr); } -static void -_nvmf_transport_qpair_fini_complete(void *cb_ctx) -{ - struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx; - - if (qpair_ctx->cb_fn) { - spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx); - } - free(qpair_ctx); -} - -static void -_nvmf_transport_qpair_fini(void *ctx) -{ - struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx; - - spdk_nvmf_poll_group_remove(qpair_ctx->qpair); - nvmf_transport_qpair_fini(qpair_ctx->qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx); -} - static void _nvmf_ctrlr_free_from_qpair(void *ctx) { @@ -961,11 +941,47 @@ _nvmf_ctrlr_free_from_qpair(void *ctx) count = spdk_bit_array_count_set(ctrlr->qpair_mask); if (count == 0) { assert(!ctrlr->in_destruct); + SPDK_DEBUGLOG(nvmf, "Last qpair %u, destroy ctrlr %hx\n", qpair_ctx->qid, ctrlr->cntlid); ctrlr->in_destruct = true; spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr); } + free(qpair_ctx); +} - spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx); +static void +_nvmf_transport_qpair_fini_complete(void *cb_ctx) +{ + struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx; + struct spdk_nvmf_ctrlr *ctrlr; + /* Store cb args since cb_ctx can be freed in _nvmf_ctrlr_free_from_qpair */ + nvmf_qpair_disconnect_cb cb_fn = qpair_ctx->cb_fn; + void *cb_arg = qpair_ctx->ctx; + struct spdk_thread *cb_thread = qpair_ctx->thread; + + ctrlr = qpair_ctx->ctrlr; + + SPDK_DEBUGLOG(nvmf, "Finish destroying qid %u\n", qpair_ctx->qid); + + if (ctrlr) { + if (qpair_ctx->qid == 0) { + /* Admin qpair is removed, so set the pointer to NULL. + * This operation is safe since we are on ctrlr thread now, admin qpair's thread is the same + * as controller's thread */ + ctrlr->admin_qpair = NULL; + } + /* Free qpair id from controller's bit mask and destroy the controller if it is the last qpair */ + if (ctrlr->thread) { + spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx); + } else { + _nvmf_ctrlr_free_from_qpair(qpair_ctx); + } + } else { + free(qpair_ctx); + } + + if (cb_fn) { + spdk_thread_send_msg(cb_thread, cb_fn, cb_arg); + } } void @@ -1026,14 +1042,9 @@ _nvmf_qpair_destroy(void *ctx, int status) } } - if (!ctrlr || !ctrlr->thread) { - spdk_nvmf_poll_group_remove(qpair); - nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx); - return; - } - qpair_ctx->ctrlr = ctrlr; - spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx); + spdk_nvmf_poll_group_remove(qpair); + nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx); } static void diff --git a/lib/nvmf/subsystem.c b/lib/nvmf/subsystem.c index 873b639b8..5c2aa88a1 100644 --- a/lib/nvmf/subsystem.c +++ b/lib/nvmf/subsystem.c @@ -3088,7 +3088,7 @@ subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i) group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i)); TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) { - if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) { + if (ctrlr->admin_qpair && ctrlr->admin_qpair->group == group && ctrlr->listener == listener) { nvmf_ctrlr_async_event_ana_change_notice(ctrlr); } }