vhost: don't lock global vhost mutex when waiting for device start/stop

This fixes a potential deadlock:

Thread 2
 * stop_device()
   * lock(&g_spdk_vhost_mutex)
   * _spdk_vhost_event_send
     * sem_wait <- waiting for pending I/O on Thread 1 to complete

Thread 1
 * spdk_rpc_construct_vhost_blk_controller
   * lock(&g_spdk_vhost_mutex) <- prevents this thread from
                                  completing any I/O

Fixes #437

Change-Id: I50ab7bc6dcd161881650ff30362127e0069a3939
Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/396577
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Darek Stojaczyk 2018-09-21 11:26:45 +02:00 committed by Jim Harris
parent 0f25ee9750
commit f08a6eebd3

View File

@ -877,6 +877,19 @@ spdk_vhost_event_async_fn(void *arg1, void *arg2)
vdev = NULL;
}
if (vdev != NULL && vdev->lcore >= 0 &&
(uint32_t)vdev->lcore != spdk_env_get_current_core()) {
/* if vdev has been relocated to other core, it is no longer thread-safe
* to access its contents here. Even though we're running under global vhost
* mutex, the controller itself (and its pollers) are not. We need to chase
* the vdev thread as many times as necessary.
*/
ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_async_fn, arg1, arg2);
spdk_event_call(ev);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
return;
}
ctx->cb_fn(vdev, arg2);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
@ -901,14 +914,32 @@ spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
}
vdev = spdk_vhost_dev_find_by_id(ctx->vdev_id);
if (vdev == ctx->vdev) {
ctx->cb_fn(vdev, arg2);
} else {
if (vdev != ctx->vdev) {
/* ctx->vdev is probably a dangling pointer at this point.
* It must have been removed in the meantime, so we just skip
* it in our foreach chain. */
goto out_unlock_continue;
}
/* the assert is just for static analyzers, vdev cannot be NULL here */
assert(vdev != NULL);
if (vdev->lcore >= 0 &&
(uint32_t)vdev->lcore != spdk_env_get_current_core()) {
/* if vdev has been relocated to other core, it is no longer thread-safe
* to access its contents here. Even though we're running under global vhost
* mutex, the controller itself (and its pollers) are not. We need to chase
* the vdev thread as many times as necessary.
*/
ev = spdk_event_allocate(vdev->lcore,
spdk_vhost_event_async_foreach_fn, arg1, arg2);
spdk_event_call(ev);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
return;
}
ctx->cb_fn(vdev, arg2);
out_unlock_continue:
vdev = spdk_vhost_dev_next(ctx->vdev_id);
spdk_vhost_external_event_foreach_continue(vdev, ctx->cb_fn, arg2);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
@ -917,8 +948,8 @@ spdk_vhost_event_async_foreach_fn(void *arg1, void *arg2)
}
static int
spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
unsigned timeout_sec, const char *errmsg)
_spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
unsigned timeout_sec, const char *errmsg)
{
struct spdk_vhost_dev_event_ctx ev_ctx = {0};
struct spdk_event *ev;
@ -936,6 +967,7 @@ spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_cb, &ev_ctx, NULL);
assert(ev);
spdk_event_call(ev);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
clock_gettime(CLOCK_REALTIME, &timeout);
timeout.tv_sec += timeout_sec;
@ -947,6 +979,7 @@ spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
}
sem_destroy(&ev_ctx.sem);
pthread_mutex_lock(&g_spdk_vhost_mutex);
return ev_ctx.response;
}
@ -999,7 +1032,7 @@ stop_device(int vid)
return;
}
rc = spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device");
rc = _spdk_vhost_event_send(vdev, vdev->backend->stop_device, 3, "stop device");
if (rc != 0) {
SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vid);
pthread_mutex_unlock(&g_spdk_vhost_mutex);
@ -1089,7 +1122,7 @@ start_device(int vid)
vdev->lcore = spdk_vhost_allocate_reactor(vdev->cpumask);
spdk_vhost_dev_mem_register(vdev);
rc = spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device");
rc = _spdk_vhost_event_send(vdev, vdev->backend->start_device, 3, "start device");
if (rc != 0) {
spdk_vhost_dev_mem_unregister(vdev);
free(vdev->mem);