From 3c2190c214514d7b1d2fb2dd9ae8214dcb36c8c5 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 16 Dec 2020 10:50:14 -0700 Subject: [PATCH] nvme: capture ticks for timeout before checking state In some extreme use cases, an SPDK process could get swapped out for a long period of time just after we checked the state but before we called spdk_get_ticks(). So now we will only timeout if the timer expired before we checked the state *and* the state did not advance. It's possible we could just move the timeout check to before the ctrlr->state switch, but I was hesitant to change the flow for this case. Fixes issue #1720. Signed-off-by: Jim Harris Change-Id: I95b1db3365b5d2d8a65e528f53c302a724d44460 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5596 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Tomasz Zawadzki --- lib/nvme/nvme_ctrlr.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 95dd3154f..69cf545c9 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -2936,15 +2936,18 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) union spdk_nvme_cc_register cc; union spdk_nvme_csts_register csts; uint32_t ready_timeout_in_ms; + uint64_t ticks; int rc = 0; + ticks = spdk_get_ticks(); + /* * May need to avoid accessing any register on the target controller * for a while. Return early without touching the FSM. * Check sleep_timeout_tsc > 0 for unit test. */ if ((ctrlr->sleep_timeout_tsc > 0) && - (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) { + (ticks <= ctrlr->sleep_timeout_tsc)) { return 0; } ctrlr->sleep_timeout_tsc = 0; @@ -2980,7 +2983,7 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) * - directly after a VFIO reset. */ SPDK_DEBUGLOG(nvme, "Adding 2 second delay before initializing the controller\n"); - ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000); + ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000); } break; @@ -3015,7 +3018,7 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) */ if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { SPDK_DEBUGLOG(nvme, "Applying quirk: delay 2.5 seconds before reading registers\n"); - ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); + ctrlr->sleep_timeout_tsc = ticks + (2500 * spdk_get_ticks_hz() / 1000); } return 0; } else { @@ -3177,8 +3180,14 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) } init_timeout: + /* Note: we use the ticks captured when we entered this function. + * This covers environments where the SPDK process gets swapped out after + * we tried to advance the state but before we check the timeout here. + * It is not normal for this to happen, but harmless to handle it in this + * way. + */ if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && - spdk_get_ticks() > ctrlr->state_timeout_tsc) { + ticks > ctrlr->state_timeout_tsc) { SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state); return -1; }