vhost: don't lock global vhost mutex when waiting for device start/stop
This fixes a potential deadlock: Thread 2 * stop_device() * lock(&g_spdk_vhost_mutex) * _spdk_vhost_event_send * sem_wait <- waiting for pending I/O on Thread 1 to complete Thread 1 * spdk_rpc_construct_vhost_blk_controller * lock(&g_spdk_vhost_mutex) <- prevents this thread from completing any I/O Fixes #437 Change-Id: I50ab7bc6dcd161881650ff30362127e0069a3939 Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/396577 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
0f25ee9750
commit
f08a6eebd3
@ -877,6 +877,19 @@ spdk_vhost_event_async_fn(void *arg1, void *arg2)
|
||||
vdev = NULL;
|
||||
}
|
||||
|
||||
if (vdev != NULL && vdev->lcore >= 0 &&
|
||||
(uint32_t)vdev->lcore != spdk_env_get_current_core()) {
|
||||
/* if vdev has been relocated to other core, it is no longer thread-safe
|
||||
* to access its contents here. Even though we're running under global vhost
|
||||
* mutex, the controller itself (and its pollers) are not. We need to chase
|
||||
* the vdev thread as many times as necessary.
|
||||
*/
|
||||
ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_async_fn, arg1, arg2);
|
||||
spdk_event_call(ev);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->cb_fn(vdev, arg2);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
|
||||
@ -901,14 +914,32 @@ spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
|
||||
}
|
||||
|
||||
vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id);
|
||||
if (vdev == ctx->vdev) {
|
||||
ctx->cb_fn(vdev, arg2);
|
||||
} else {
|
||||
if (vdev != ctx->vdev) {
|
||||
/* ctx->vdev is probably a dangling pointer at this point.
|
||||
* It must have been removed in the meantime, so we just skip
|
||||
* it in our foreach chain. */
|
||||
goto out_unlock_continue;
|
||||
}
|
||||
|
||||
/* the assert is just for static analyzers, vdev cannot be NULL here */
|
||||
assert(vdev != NULL);
|
||||
if (vdev->lcore >= 0 &&
|
||||
(uint32_t)vdev->lcore != spdk_env_get_current_core()) {
|
||||
/* if vdev has been relocated to other core, it is no longer thread-safe
|
||||
* to access its contents here. Even though we're running under global vhost
|
||||
* mutex, the controller itself (and its pollers) are not. We need to chase
|
||||
* the vdev thread as many times as necessary.
|
||||
*/
|
||||
ev = spdk_event_allocate(vdev->lcore,
|
||||
spdk_vhost_event_async_foreach_fn, arg1, arg2);
|
||||
spdk_event_call(ev);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->cb_fn(vdev, arg2);
|
||||
|
||||
out_unlock_continue:
|
||||
vdev = spdk_vhost_dev_next(ctx->vdev_id);
|
||||
spdk_vhost_external_event_foreach_continue(vdev, ctx->cb_fn, arg2);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
@ -917,7 +948,7 @@ spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
|
||||
}
|
||||
|
||||
static int
|
||||
spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
|
||||
_spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
|
||||
unsigned timeout_sec, const char *errmsg)
|
||||
{
|
||||
struct spdk_vhost_dev_event_ctx ev_ctx = {0};
|
||||
@ -936,6 +967,7 @@ spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
|
||||
ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_cb, &ev_ctx, NULL);
|
||||
assert(ev);
|
||||
spdk_event_call(ev);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &timeout);
|
||||
timeout.tv_sec += timeout_sec;
|
||||
@ -947,6 +979,7 @@ spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
|
||||
}
|
||||
|
||||
sem_destroy(&ev_ctx.sem);
|
||||
pthread_mutex_lock(&g_spdk_vhost_mutex);
|
||||
return ev_ctx.response;
|
||||
}
|
||||
|
||||
@ -999,7 +1032,7 @@ stop_device(int vid)
|
||||
return;
|
||||
}
|
||||
|
||||
rc = spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device");
|
||||
rc = _spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device");
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vid);
|
||||
pthread_mutex_unlock(&g_spdk_vhost_mutex);
|
||||
@ -1089,7 +1122,7 @@ start_device(int vid)
|
||||
|
||||
vdev->lcore = spdk_vhost_allocate_reactor(vdev->cpumask);
|
||||
spdk_vhost_dev_mem_register(vdev);
|
||||
rc = spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device");
|
||||
rc = _spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device");
|
||||
if (rc != 0) {
|
||||
spdk_vhost_dev_mem_unregister(vdev);
|
||||
free(vdev->mem);
|
||||
|
Loading…
Reference in New Issue
Block a user