From 20abbe8abe23655548cdd95d272c3b88cd84bfb0 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Tue, 23 Feb 2016 16:36:13 -0700 Subject: [PATCH] nvme: perform resets in parallel during attach When multiple NVMe controllers are being initialized during spdk_nvme_probe(), we can overlap the hardware resets of all controllers to improve startup time. Rewrite the initialization sequence as a polling function, nvme_ctrlr_process_init(), that maintains a per-controller state machine to determine which initialization step is underway. Each step also has a timeout to ensure the process will terminate if the hardware is hung. Currently, only the hardware reset (toggling of CC.EN and waiting for CSTS.RDY) is done in parallel; the rest of initialization is done sequentially in nvme_ctrlr_start() as before. These steps could also be parallelized in a similar framework if measurements indicate that they take a significant amount of time. Change-Id: I02ce5863f1b5c13ad65ccd8be571085528d98bd5 Signed-off-by: Daniel Verkamp --- lib/nvme/nvme.c | 68 ++++--- lib/nvme/nvme_ctrlr.c | 166 ++++++++++++++---- lib/nvme/nvme_internal.h | 36 ++++ test/lib/nvme/unit/nvme_c/nvme_ut.c | 6 + .../nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c | 2 + .../nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c | 6 + 6 files changed, 216 insertions(+), 68 deletions(-) diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c index 63a316b41..b8923ed7b 100644 --- a/lib/nvme/nvme.c +++ b/lib/nvme/nvme.c @@ -292,7 +292,7 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a { int rc, start_rc; struct nvme_enum_ctx enum_ctx; - struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; nvme_mutex_lock(&g_nvme_driver.lock); @@ -305,38 +305,48 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a * but maintain the value of rc to signal errors when we return. */ - /* TODO: This could be reworked to start all the controllers in parallel. */ + /* Initialize all new controllers in the init_ctrlrs list in parallel. */ while (!TAILQ_EMPTY(&g_nvme_driver.init_ctrlrs)) { - /* Remove ctrlr from init_ctrlrs and attempt to start it */ - ctrlr = TAILQ_FIRST(&g_nvme_driver.init_ctrlrs); - TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq); - - /* - * Drop the driver lock while calling nvme_ctrlr_start() since it needs to acquire - * the driver lock internally. - * - * TODO: Rethink the locking - maybe reset should take the lock so that start() and - * the functions it calls (in particular nvme_ctrlr_set_num_qpairs()) - * can assume it is held. - */ - nvme_mutex_unlock(&g_nvme_driver.lock); - start_rc = nvme_ctrlr_start(ctrlr); - nvme_mutex_lock(&g_nvme_driver.lock); - - if (start_rc == 0) { - TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq); - - /* - * Unlock while calling attach_cb() so the user can call other functions - * that may take the driver lock, like nvme_detach(). + TAILQ_FOREACH_SAFE(ctrlr, &g_nvme_driver.init_ctrlrs, tailq, ctrlr_tmp) { + /* Drop the driver lock while calling nvme_ctrlr_process_init() + * since it needs to acquire the driver lock internally when calling + * nvme_ctrlr_start(). + * + * TODO: Rethink the locking - maybe reset should take the lock so that start() and + * the functions it calls (in particular nvme_ctrlr_set_num_qpairs()) + * can assume it is held. */ nvme_mutex_unlock(&g_nvme_driver.lock); - attach_cb(cb_ctx, ctrlr->devhandle, ctrlr); + start_rc = nvme_ctrlr_process_init(ctrlr); nvme_mutex_lock(&g_nvme_driver.lock); - } else { - nvme_ctrlr_destruct(ctrlr); - nvme_free(ctrlr); - rc = -1; + + if (start_rc) { + /* Controller failed to initialize. */ + TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq); + nvme_ctrlr_destruct(ctrlr); + nvme_free(ctrlr); + rc = -1; + break; + } + + if (ctrlr->state == NVME_CTRLR_STATE_READY) { + /* + * Controller has been initialized. + * Move it to the attached_ctrlrs list. + */ + TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq); + TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq); + + /* + * Unlock while calling attach_cb() so the user can call other functions + * that may take the driver lock, like nvme_detach(). + */ + nvme_mutex_unlock(&g_nvme_driver.lock); + attach_cb(cb_ctx, ctrlr->devhandle, ctrlr); + nvme_mutex_lock(&g_nvme_driver.lock); + + break; + } } } diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 694ce42d8..570530c66 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -326,18 +326,13 @@ static int nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) { union spdk_nvme_cc_register cc; - union spdk_nvme_csts_register csts; union spdk_nvme_aqa_register aqa; cc.raw = nvme_mmio_read_4(ctrlr, cc.raw); - csts.raw = nvme_mmio_read_4(ctrlr, csts); - if (cc.bits.en == 1) { - if (csts.bits.rdy == 1) { - return 0; - } else { - return nvme_ctrlr_wait_for_ready(ctrlr, 1); - } + if (cc.bits.en != 0) { + nvme_printf(ctrlr, "%s called with CC.EN = 1\n", __func__); + return EINVAL; } nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); @@ -361,41 +356,26 @@ nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) nvme_mmio_write_4(ctrlr, cc.raw, cc.raw); - return nvme_ctrlr_wait_for_ready(ctrlr, 1); + return 0; } -static int -nvme_ctrlr_hw_reset(struct spdk_nvme_ctrlr *ctrlr) +static void +nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state, + uint64_t timeout_in_ms) { - uint32_t i; - int rc; - union spdk_nvme_cc_register cc; - - cc.raw = nvme_mmio_read_4(ctrlr, cc.raw); - if (cc.bits.en) { - nvme_qpair_disable(&ctrlr->adminq); - for (i = 0; i < ctrlr->num_io_queues; i++) { - nvme_qpair_disable(&ctrlr->ioq[i]); - } + ctrlr->state = state; + if (timeout_in_ms == NVME_TIMEOUT_INFINITE) { + ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE; } else { - /* - * The controller was already disabled. We will assume that nothing - * has been changed since cc.en was set to 0, - * meaning that we don't need to do an extra reset, and we can just - * re-enable the controller. - */ + ctrlr->state_timeout_tsc = nvme_get_tsc() + (timeout_in_ms * nvme_get_tsc_hz()) / 1000; } - - nvme_ctrlr_disable(ctrlr); - rc = nvme_ctrlr_enable(ctrlr); - - return rc; } int spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) { - int rc; + int rc = 0; + uint32_t i; nvme_mutex_lock(&ctrlr->ctrlr_lock); @@ -412,10 +392,23 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr) ctrlr->is_resetting = true; nvme_printf(ctrlr, "resetting controller\n"); - /* nvme_ctrlr_start() issues a reset as its first step */ - rc = nvme_ctrlr_start(ctrlr); - if (rc) { - nvme_ctrlr_fail(ctrlr); + + /* Disable all queues before disabling the controller hardware. */ + nvme_qpair_disable(&ctrlr->adminq); + for (i = 0; i < ctrlr->num_io_queues; i++) { + nvme_qpair_disable(&ctrlr->ioq[i]); + } + + /* Set the state back to INIT to cause a full hardware reset. */ + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); + + while (ctrlr->state != NVME_CTRLR_STATE_READY) { + if (nvme_ctrlr_process_init(ctrlr) != 0) { + nvme_printf(ctrlr, "%s: controller reinitialization failed\n", __func__); + nvme_ctrlr_fail(ctrlr); + rc = -1; + break; + } } ctrlr->is_resetting = false; @@ -699,13 +692,107 @@ nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) return 0; } +/** + * This function will be called repeatedly during initialization until the controller is ready. + */ int -nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) +nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) { - if (nvme_ctrlr_hw_reset(ctrlr) != 0) { + union spdk_nvme_cc_register cc; + union spdk_nvme_csts_register csts; + union spdk_nvme_cap_lo_register cap_lo; + uint32_t ready_timeout_in_ms; + int rc; + + cc.raw = nvme_mmio_read_4(ctrlr, cc.raw); + csts.raw = nvme_mmio_read_4(ctrlr, csts); + cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw); + + ready_timeout_in_ms = 500 * cap_lo.bits.to; + + /* + * Check if the current initialization step is done or has timed out. + */ + switch (ctrlr->state) { + case NVME_CTRLR_STATE_INIT: + /* Begin the hardware initialization by making sure the controller is disabled. */ + if (cc.bits.en) { + /* + * Controller is currently enabled. We need to disable it to cause a reset. + * + * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready. + * Wait for the ready bit to be 1 before disabling the controller. + */ + if (csts.bits.rdy == 0) { + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); + return 0; + } + + /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */ + cc.bits.en = 0; + nvme_mmio_write_4(ctrlr, cc.raw, cc.raw); + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); + return 0; + } else { + /* + * Controller is currently disabled. We can jump straight to enabling it. + */ + nvme_ctrlr_enable(ctrlr); + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); + return 0; + } + break; + + case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1: + if (csts.bits.rdy == 1) { + /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */ + cc.bits.en = 0; + nvme_mmio_write_4(ctrlr, cc.raw, cc.raw); + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms); + return 0; + } + break; + + case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0: + if (csts.bits.rdy == 0) { + /* CC.EN = 0 && CSTS.RDY = 0, so we can enable the controller now. */ + nvme_ctrlr_enable(ctrlr); + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms); + return 0; + } + break; + + case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1: + if (csts.bits.rdy == 1) { + /* + * The controller has been enabled. + * Perform the rest of initialization in nvme_ctrlr_start() serially. + */ + rc = nvme_ctrlr_start(ctrlr); + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE); + return rc; + } + break; + + default: + nvme_assert(0, ("unhandled ctrlr state %d\n", ctrlr->state)); + nvme_ctrlr_fail(ctrlr); return -1; } + if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE && + nvme_get_tsc() > ctrlr->state_timeout_tsc) { + nvme_printf(ctrlr, "Initialization timed out in state %d\n", ctrlr->state); + nvme_ctrlr_fail(ctrlr); + return -1; + } + + return 0; +} + +int +nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) +{ nvme_qpair_reset(&ctrlr->adminq); nvme_qpair_enable(&ctrlr->adminq); @@ -771,6 +858,7 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) int status; int rc; + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); ctrlr->devhandle = devhandle; status = nvme_ctrlr_allocate_bars(ctrlr); diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 58bcea42e..eeac6689c 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -287,6 +287,38 @@ struct spdk_nvme_ns { uint16_t flags; }; +/** + * State of struct spdk_nvme_ctrlr (in particular, during initialization). + */ +enum nvme_ctrlr_state { + /** + * Controller has not been initialized yet. + */ + NVME_CTRLR_STATE_INIT, + + /** + * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0. + */ + NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, + + /** + * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1. + */ + NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, + + /** + * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller. + */ + NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, + + /** + * Controller initialization has completed and the controller is ready. + */ + NVME_CTRLR_STATE_READY +}; + +#define NVME_TIMEOUT_INFINITE UINT64_MAX + /* * One of these per allocated PCI device. */ @@ -310,6 +342,9 @@ struct spdk_nvme_ctrlr { /* Cold data (not accessed in normal I/O path) is after this point. */ + enum nvme_ctrlr_state state; + uint64_t state_timeout_tsc; + TAILQ_ENTRY(spdk_nvme_ctrlr) tailq; /** All the log pages supported */ @@ -433,6 +468,7 @@ void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl); int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle); void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); +int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr); int nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr); void nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, diff --git a/test/lib/nvme/unit/nvme_c/nvme_ut.c b/test/lib/nvme/unit/nvme_c/nvme_ut.c index d34925aa3..b3a7b9496 100644 --- a/test/lib/nvme/unit/nvme_c/nvme_ut.c +++ b/test/lib/nvme/unit/nvme_c/nvme_ut.c @@ -57,6 +57,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { } +int +nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) +{ + return 0; +} + int nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) { diff --git a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c index cbc0b24ac..4f6d200a9 100644 --- a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c +++ b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c @@ -47,6 +47,8 @@ static uint16_t g_pci_subdevice_id; char outbuf[OUTBUF_SIZE]; +uint64_t g_ut_tsc = 0; + __thread int nvme_thread_ioq_index = -1; uint16_t diff --git a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c index 1b97cf1cd..28da9694c 100644 --- a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c @@ -55,6 +55,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { } +int +nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) +{ + return 0; +} + int nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) {