From 6d5594147b6be11e972881acf238962845f23b69 Mon Sep 17 00:00:00 2001 From: Liu Xiaodong Date: Wed, 24 Nov 2021 15:05:55 -0500 Subject: [PATCH] vhost: set timeout for session's stop_poller If there is still some inflight IO which prevents vhost_session_stop_done(), stop_poller can try within 4 seconds, and then call vhost_session_stop_done with -ETIMEDOUT. This can avoid endless blocking in ctrl pthread if there is no response from vhost session or its backend bdev. Then spdk vhost target can still serve all other vhost devices and operations besides the error one. Change-Id: I2fc78b4da926c936a2e42dc0e66ce1c60001330d Signed-off-by: Liu Xiaodong Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10393 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Jim Harris --- lib/vhost/vhost.c | 6 +++++- lib/vhost/vhost_blk.c | 15 +++++++++++---- lib/vhost/vhost_internal.h | 3 +++ lib/vhost/vhost_scsi.c | 16 ++++++++++++---- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index 947ddb4e3..24dffdbfe 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -1564,6 +1564,10 @@ vhost_destroy_connection_cb(int vid) if (vsession->started) { rc = _stop_session(vsession); + if (rc != 0) { + pthread_mutex_unlock(&g_vhost_mutex); + return rc; + } } TAILQ_REMOVE(&vsession->vdev->vsessions, vsession, tailq); @@ -1571,7 +1575,7 @@ vhost_destroy_connection_cb(int vid) free(vsession); pthread_mutex_unlock(&g_vhost_mutex); - return rc; + return 0; } void diff --git a/lib/vhost/vhost_blk.c b/lib/vhost/vhost_blk.c index 63a725651..5349a2ab0 100644 --- a/lib/vhost/vhost_blk.c +++ b/lib/vhost/vhost_blk.c @@ -1346,11 +1346,16 @@ destroy_session_poller_cb(void *arg) struct spdk_vhost_session *vsession = &bvsession->vsession; int i; - if (vsession->task_cnt > 0) { - return SPDK_POLLER_BUSY; - } + if (vsession->task_cnt > 0 || spdk_vhost_trylock() != 0) { + assert(vsession->stop_retry_count > 0); + vsession->stop_retry_count--; + if (vsession->stop_retry_count == 0) { + SPDK_ERRLOG("%s: Timedout when destroy session (task_cnt %d)\n", vsession->name, + vsession->task_cnt); + spdk_poller_unregister(&bvsession->stop_poller); + vhost_session_stop_done(vsession, -ETIMEDOUT); + } - if (spdk_vhost_trylock() != 0) { return SPDK_POLLER_BUSY; } @@ -1387,6 +1392,8 @@ vhost_blk_stop_cb(struct spdk_vhost_dev *vdev, vhost_blk_session_unregister_interrupts(bvsession); } + /* vhost_session_send_event timeout is 3 seconds, here set retry within 4 seconds */ + bvsession->vsession.stop_retry_count = 4000; bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb, bvsession, 1000); return 0; diff --git a/lib/vhost/vhost_internal.h b/lib/vhost/vhost_internal.h index c98b9f617..aadf49765 100644 --- a/lib/vhost/vhost_internal.h +++ b/lib/vhost/vhost_internal.h @@ -166,6 +166,9 @@ struct spdk_vhost_session { /* Interval used for event coalescing checking. */ uint64_t stats_check_interval; + /* Session's stop poller will only try limited times to destroy the session. */ + uint32_t stop_retry_count; + struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES]; TAILQ_ENTRY(spdk_vhost_session) tailq; diff --git a/lib/vhost/vhost_scsi.c b/lib/vhost/vhost_scsi.c index 6e9345794..d7e6cff48 100644 --- a/lib/vhost/vhost_scsi.c +++ b/lib/vhost/vhost_scsi.c @@ -1443,11 +1443,16 @@ destroy_session_poller_cb(void *arg) struct spdk_scsi_dev_session_state *state; uint32_t i; - if (vsession->task_cnt > 0) { - return SPDK_POLLER_BUSY; - } + if (vsession->task_cnt > 0 || spdk_vhost_trylock() != 0) { + assert(vsession->stop_retry_count > 0); + vsession->stop_retry_count--; + if (vsession->stop_retry_count == 0) { + SPDK_ERRLOG("%s: Timedout when destroy session (task_cnt %d)\n", vsession->name, + vsession->task_cnt); + spdk_poller_unregister(&svsession->stop_poller); + vhost_session_stop_done(vsession, -ETIMEDOUT); + } - if (spdk_vhost_trylock() != 0) { return SPDK_POLLER_BUSY; } @@ -1506,6 +1511,9 @@ vhost_scsi_stop_cb(struct spdk_vhost_dev *vdev, */ spdk_poller_unregister(&svsession->mgmt_poller); + /* vhost_session_send_event timeout is 3 seconds, here set retry within 4 seconds */ + svsession->vsession.stop_retry_count = 4000; + /* Wait for all pending I/Os to complete, then process all the * remaining hotremove events one last time. */