From 7003bd0de30b20c297034900c77f0ee483928c13 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Tue, 28 Jun 2022 11:30:03 +0800 Subject: [PATCH] nvmf/vfio-user: take endpoint as input parameter in quiesce_done QEMU may exit due to some exceptions which mean the socket connection may be disconnected at any time, so for asynchronous callbacks especially the subsystem pause/resume callbacks, they all run in asynchronous way, the controller pointer may become invalid before the callbacks are called. Fix #2530. Change-Id: I6d73597d75761e28844e83bfee7f8a446d85fa49 Signed-off-by: Changpeng Liu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12831 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Tomasz Zawadzki Reviewed-by: Jim Harris --- lib/nvmf/vfio_user.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/lib/nvmf/vfio_user.c b/lib/nvmf/vfio_user.c index 5af0d7301..1754f7998 100644 --- a/lib/nvmf/vfio_user.c +++ b/lib/nvmf/vfio_user.c @@ -2899,7 +2899,7 @@ init_pci_config_space(vfu_pci_config_space_t *p) } struct ctrlr_quiesce_ctx { - struct nvmf_vfio_user_ctrlr *ctrlr; + struct nvmf_vfio_user_endpoint *endpoint; struct nvmf_vfio_user_poll_group *group; int status; }; @@ -2914,6 +2914,10 @@ _vfio_user_endpoint_resume_done_msg(void *ctx) endpoint->need_resume = false; + if (!vu_ctrlr) { + return; + } + if (!vu_ctrlr->queued_quiesce) { vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING; @@ -2961,10 +2965,15 @@ static void vfio_user_quiesce_done(void *ctx) { struct ctrlr_quiesce_ctx *quiesce_ctx = ctx; - struct nvmf_vfio_user_ctrlr *vu_ctrlr = quiesce_ctx->ctrlr; - struct nvmf_vfio_user_endpoint *endpoint = vu_ctrlr->endpoint; + struct nvmf_vfio_user_endpoint *endpoint = quiesce_ctx->endpoint; + struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr; int ret; + if (!vu_ctrlr) { + free(quiesce_ctx); + return; + } + SPDK_DEBUGLOG(nvmf_vfio, "%s device quiesced\n", ctrlr_id(vu_ctrlr)); assert(vu_ctrlr->state == VFIO_USER_CTRLR_PAUSING); @@ -2996,13 +3005,20 @@ vfio_user_pause_done(struct spdk_nvmf_subsystem *subsystem, void *ctx, int status) { struct ctrlr_quiesce_ctx *quiesce_ctx = ctx; + struct nvmf_vfio_user_endpoint *endpoint = quiesce_ctx->endpoint; + struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr; + + if (!vu_ctrlr) { + free(quiesce_ctx); + return; + } quiesce_ctx->status = status; SPDK_DEBUGLOG(nvmf_vfio, "%s pause done with status %d\n", - ctrlr_id(quiesce_ctx->ctrlr), status); + ctrlr_id(vu_ctrlr), status); - spdk_thread_send_msg(quiesce_ctx->ctrlr->thread, + spdk_thread_send_msg(vu_ctrlr->thread, vfio_user_quiesce_done, ctx); } @@ -3019,15 +3035,20 @@ static void vfio_user_quiesce_pg(void *ctx) { struct ctrlr_quiesce_ctx *quiesce_ctx = ctx; - struct nvmf_vfio_user_ctrlr *vu_ctrlr = quiesce_ctx->ctrlr; - struct spdk_nvmf_subsystem *subsystem = vu_ctrlr->endpoint->subsystem; + struct nvmf_vfio_user_endpoint *endpoint = quiesce_ctx->endpoint; + struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr; struct nvmf_vfio_user_poll_group *vu_group = quiesce_ctx->group; + struct spdk_nvmf_subsystem *subsystem = endpoint->subsystem; int ret; SPDK_DEBUGLOG(nvmf_vfio, "quiesced pg:%p\n", vu_group); - quiesce_ctx->group = TAILQ_NEXT(vu_group, link); + if (!vu_ctrlr) { + free(quiesce_ctx); + return; + } + quiesce_ctx->group = TAILQ_NEXT(vu_group, link); if (quiesce_ctx->group != NULL) { spdk_thread_send_msg(poll_group_to_thread(quiesce_ctx->group), vfio_user_quiesce_pg, quiesce_ctx); @@ -3038,7 +3059,7 @@ vfio_user_quiesce_pg(void *ctx) vfio_user_pause_done, quiesce_ctx); if (ret < 0) { SPDK_ERRLOG("%s: failed to pause, ret=%d\n", - endpoint_id(vu_ctrlr->endpoint), ret); + endpoint_id(endpoint), ret); vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING; fail_ctrlr(vu_ctrlr); free(quiesce_ctx); @@ -3059,7 +3080,7 @@ ctrlr_quiesce(struct nvmf_vfio_user_ctrlr *vu_ctrlr) return; } - quiesce_ctx->ctrlr = vu_ctrlr; + quiesce_ctx->endpoint = vu_ctrlr->endpoint; quiesce_ctx->status = 0; quiesce_ctx->group = TAILQ_FIRST(&vu_ctrlr->transport->poll_groups); @@ -3072,7 +3093,6 @@ vfio_user_dev_quiesce_cb(vfu_ctx_t *vfu_ctx) { struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); struct spdk_nvmf_subsystem *subsystem = endpoint->subsystem; - struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr; if (!vu_ctrlr) {