nvmf/vfio-user: set controller state in one thread

The completion callback of `spdk_nvmf_subsystem_resume`
and `spdk_nvmf_subsystem_pause` can run in different
core other than the `vfu_ctx` core, this may lead to
race condition when changing controller's state. Here
we use a thread message to change it in the same thread
context.

Change-Id: I53d139adcca6ff72a3b91a2a931f1239f3271fa9
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12558
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: John Levon <levon@movementarian.org>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Changpeng Liu 2022-05-06 10:14:31 +08:00 committed by Tomasz Zawadzki
parent a877ca604d
commit 8ab0975b2a

View File

@ -2886,23 +2886,22 @@ init_pci_config_space(vfu_pci_config_space_t *p)
p->hdr.intr.ipin = 0x1;
}
struct subsystem_pause_ctx {
struct nvmf_vfio_user_ctrlr *ctrlr;
int status;
};
static void
vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status);
static void
vfio_user_endpoint_resume_done(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
_vfio_user_endpoint_resume_done_msg(void *ctx)
{
struct nvmf_vfio_user_endpoint *endpoint = cb_arg;
struct nvmf_vfio_user_endpoint *endpoint = ctx;
struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr;
int ret;
SPDK_DEBUGLOG(nvmf_vfio, "%s resumed done with status %d\n", endpoint_id(endpoint), status);
if (!vu_ctrlr) {
return;
}
vu_ctrlr->state = VFIO_USER_CTRLR_RUNNING;
/* Basically, once we call `vfu_device_quiesced` the device is unquiesced from
@ -2925,19 +2924,34 @@ vfio_user_endpoint_resume_done(struct spdk_nvmf_subsystem *subsystem,
}
static void
vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
vfio_user_endpoint_resume_done(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
{
struct nvmf_vfio_user_ctrlr *vu_ctrlr = cb_arg;
struct nvmf_vfio_user_endpoint *endpoint = cb_arg;
struct nvmf_vfio_user_ctrlr *vu_ctrlr = endpoint->ctrlr;
SPDK_DEBUGLOG(nvmf_vfio, "%s resumed done with status %d\n", endpoint_id(endpoint), status);
if (!vu_ctrlr) {
return;
}
spdk_thread_send_msg(vu_ctrlr->thread, _vfio_user_endpoint_resume_done_msg, endpoint);
}
static void
_vfio_user_dev_quiesce_done_msg(void *ctx)
{
struct subsystem_pause_ctx *subsystem_ctx = ctx;
struct nvmf_vfio_user_ctrlr *vu_ctrlr = subsystem_ctx->ctrlr;
struct nvmf_vfio_user_endpoint *endpoint = vu_ctrlr->endpoint;
int ret;
SPDK_DEBUGLOG(nvmf_vfio, "%s paused done with status %d\n", ctrlr_id(vu_ctrlr), status);
assert(vu_ctrlr->state == VFIO_USER_CTRLR_PAUSING);
vu_ctrlr->state = VFIO_USER_CTRLR_PAUSED;
vfu_device_quiesced(endpoint->vfu_ctx, status);
vfu_device_quiesced(endpoint->vfu_ctx, subsystem_ctx->status);
vu_ctrlr->queued_quiesce = false;
free(subsystem_ctx);
/* `vfu_device_quiesced` can change the migration state,
* so we need to re-check `vu_ctrlr->state`.
@ -2957,6 +2971,28 @@ vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
}
}
static void
vfio_user_dev_quiesce_done(struct spdk_nvmf_subsystem *subsystem,
void *cb_arg, int status)
{
struct nvmf_vfio_user_ctrlr *vu_ctrlr = cb_arg;
struct subsystem_pause_ctx *ctx;
SPDK_DEBUGLOG(nvmf_vfio, "%s paused done with status %d\n", ctrlr_id(vu_ctrlr), status);
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
SPDK_ERRLOG("Failed to allocate subsystem pause context\n");
assert(false);
return;
}
ctx->ctrlr = vu_ctrlr;
ctx->status = status;
spdk_thread_send_msg(vu_ctrlr->thread, _vfio_user_dev_quiesce_done_msg, ctx);
}
static int
vfio_user_dev_quiesce_cb(vfu_ctx_t *vfu_ctx)
{