From bfc8bc87fb6baf1b1d7b7d968cdc8a8cdc9f114c Mon Sep 17 00:00:00 2001 From: GangCao Date: Mon, 17 Oct 2016 21:03:24 -0400 Subject: [PATCH] nvme: add the per process admin cpl queue for multi-process case Change-Id: Ie67e3414db807160092bb10812a586b7230e0a89 Signed-off-by: GangCao --- lib/nvme/nvme.c | 2 + lib/nvme/nvme_ctrlr.c | 54 ++++++++ lib/nvme/nvme_internal.h | 35 ++++++ lib/nvme/nvme_pcie.c | 117 +++++++++++++++++- test/lib/nvme/unit/nvme_c/nvme_ut.c | 6 + .../nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c | 37 ++++++ .../unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut.c | 2 + .../nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c | 6 + .../nvme/unit/nvme_qpair_c/nvme_qpair_ut.c | 3 + 9 files changed, 257 insertions(+), 5 deletions(-) diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c index 0c5e36d99..3e54acd25 100644 --- a/lib/nvme/nvme.c +++ b/lib/nvme/nvme.c @@ -107,6 +107,7 @@ nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size, req->cb_arg = cb_arg; req->payload = *payload; req->payload_size = payload_size; + req->pid = getpid(); return req; } @@ -142,6 +143,7 @@ nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl) xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { + assert(req->pid == getpid()); memcpy(req->user_buffer, req->payload.u.contig, req->payload_size); } diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 5ed4e620c..b811c5773 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -800,6 +800,54 @@ nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) return 0; } +/** + * This function will be called when a new process is using the controller. + * 1. For the primary process, it is called when constructing the controller. + * 2. For the secondary process, it is called at probing the controller. + */ +int +nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) +{ + struct spdk_nvme_controller_process *ctrlr_proc; + + /* Initialize the per process properties for this ctrlr */ + ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_controller_process), 64, NULL); + if (ctrlr_proc == NULL) { + SPDK_ERRLOG("failed to allocate memory to track the process props\n"); + + return -1; + } + + ctrlr_proc->is_primary = spdk_process_is_primary(); + ctrlr_proc->pid = getpid(); + STAILQ_INIT(&ctrlr_proc->active_reqs); + ctrlr_proc->devhandle = devhandle; + + TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq); + + return 0; +} + +/** + * This function will be called when destructing the controller. + * 1. There is no more admin request on this controller. + * 2. Clean up any left resource allocation when its associated process is gone. + */ +void +nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr) +{ + struct spdk_nvme_controller_process *active_proc, *tmp; + + /* Free all the processes' properties and make sure no pending admin IOs */ + TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { + TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq); + + assert(STAILQ_EMPTY(&active_proc->active_reqs)); + + spdk_free(active_proc); + } +} + /** * This function will be called repeatedly during initialization until the controller is ready. */ @@ -984,7 +1032,9 @@ nvme_mutex_init_recursive_shared(pthread_mutex_t *mtx) return -1; } if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) || +#ifndef __FreeBSD__ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) || +#endif pthread_mutex_init(mtx, &attr)) { rc = -1; } @@ -1014,6 +1064,8 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) ctrlr->quirks = nvme_get_quirks(&pci_id); } + TAILQ_INIT(&ctrlr->active_procs); + return 0; } @@ -1035,6 +1087,8 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) pthread_mutex_destroy(&ctrlr->ctrlr_lock); + nvme_ctrlr_free_processes(ctrlr); + ctrlr->transport->ctrlr_destruct(ctrlr); } diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index d8fbaa688..634a7a0b0 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -163,6 +163,16 @@ struct nvme_request { void *cb_arg; STAILQ_ENTRY(nvme_request) stailq; + /** + * The active admin request can be moved to a per process pending + * list based on the saved pid to tell which process it belongs + * to. The cpl saves the original completion information which + * is used in the completion callback. + * NOTE: these below two fields are only used for admin request. + */ + pid_t pid; + struct spdk_nvme_cpl cpl; + /** * The following members should not be reordered with members * above. These members are only needed when splitting @@ -313,6 +323,25 @@ enum nvme_ctrlr_state { #define NVME_TIMEOUT_INFINITE UINT64_MAX +/* + * Used to track properties for all processes accessing the controller. + */ +struct spdk_nvme_controller_process { + /** Whether it is the primary process */ + bool is_primary; + + /** Process ID */ + pid_t pid; + + /** Active admin requests to be completed */ + STAILQ_HEAD(, nvme_request) active_reqs; + + TAILQ_ENTRY(spdk_nvme_controller_process) tailq; + + /** Per process PCI device handle */ + struct spdk_pci_device *devhandle; +}; + /* * One of these per allocated PCI device. */ @@ -395,6 +424,9 @@ struct spdk_nvme_ctrlr { /* Extra sleep time during controller initialization */ uint64_t sleep_timeout_tsc; + + /** Track all the processes manage this controller */ + TAILQ_HEAD(, spdk_nvme_controller_process) active_procs; }; struct nvme_driver { @@ -475,6 +507,9 @@ int nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr, spdk_nvme_cmd_cb cb_fn, void *cb_arg); void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl); +int nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle); +void nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr); + int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr); void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr); diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index c28ed6b3f..02ae93c3f 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -489,6 +489,13 @@ static struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(void *devhandle) return NULL; } + /* Construct the primary process properties */ + rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev); + if (rc != 0) { + nvme_ctrlr_destruct(&pctrlr->ctrlr); + return NULL; + } + return &pctrlr->ctrlr; } @@ -678,6 +685,91 @@ nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *sr #endif } +static void +nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, + struct nvme_request *req, struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct nvme_request *active_req = req; + struct spdk_nvme_controller_process *active_proc, *tmp; + bool pending_on_proc = false; + + /* + * The admin request is from another process. Move to the per + * process list for that process to handle it later. + */ + assert(nvme_qpair_is_admin_queue(qpair)); + assert(active_req->pid != getpid()); + + /* Acquire the recursive lock first if not held already. */ + pthread_mutex_lock(&ctrlr->ctrlr_lock); + + TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) { + if (active_proc->pid == active_req->pid) { + /* Saved the original completion information */ + memcpy(&active_req->cpl, cpl, sizeof(*cpl)); + STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); + pending_on_proc = true; + + break; + } + } + + pthread_mutex_unlock(&ctrlr->ctrlr_lock); + + if (pending_on_proc == false) { + SPDK_ERRLOG("The owning process is not found. Drop the request.\n"); + + nvme_free_request(active_req); + } +} + +static void +nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + struct nvme_request *req, *tmp_req; + bool proc_found = false; + pid_t pid = getpid(); + struct spdk_nvme_controller_process *proc; + + /* + * Check whether there is any pending admin request from + * other active processes. + */ + assert(nvme_qpair_is_admin_queue(qpair)); + + /* Acquire the recursive lock if not held already */ + pthread_mutex_lock(&ctrlr->ctrlr_lock); + + TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) { + if (proc->pid == pid) { + proc_found = true; + + break; + } + } + + pthread_mutex_unlock(&ctrlr->ctrlr_lock); + + if (proc_found == false) { + SPDK_ERRLOG("the active process is not found for this controller."); + assert(proc_found); + } + + STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { + STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); + + assert(req->pid == pid); + + if (req->cb_fn) { + req->cb_fn(req->cb_arg, &req->cpl); + } + + nvme_free_request(req); + } +} + static void nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) { @@ -702,9 +794,10 @@ static void nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, struct spdk_nvme_cpl *cpl, bool print_on_error) { - struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); - struct nvme_request *req; - bool retry, error, was_active; + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_request *req; + bool retry, error, was_active; + bool req_from_current_proc = true; req = tr->req; @@ -729,10 +822,19 @@ nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_trac nvme_pcie_qpair_submit_tracker(qpair, tr); } else { if (was_active && req->cb_fn) { - req->cb_fn(req->cb_arg, cpl); + /* Only check admin requests from different processes. */ + if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { + req_from_current_proc = false; + nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); + } else { + req->cb_fn(req->cb_arg, cpl); + } + } + + if (req_from_current_proc == true) { + nvme_free_request(req); } - nvme_free_request(req); tr->req = NULL; LIST_REMOVE(tr, list); @@ -1483,6 +1585,11 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); } + /* Before returning, complete any pending admin request. */ + if (nvme_qpair_is_admin_queue(qpair)) { + nvme_pcie_qpair_complete_pending_admin_request(qpair); + } + return num_completions; } diff --git a/test/lib/nvme/unit/nvme_c/nvme_ut.c b/test/lib/nvme/unit/nvme_c/nvme_ut.c index 8c41346f1..a18b08782 100644 --- a/test/lib/nvme/unit/nvme_c/nvme_ut.c +++ b/test/lib/nvme/unit/nvme_c/nvme_ut.c @@ -55,6 +55,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { } +int +nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) +{ + return 0; +} + int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) { diff --git a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c index 4cdbc9949..d01277ac6 100644 --- a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c +++ b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c @@ -376,6 +376,7 @@ nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size, req->cb_fn = cb_fn; req->cb_arg = cb_arg; + req->pid = getpid(); } return req; @@ -525,6 +526,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_rr(void) CU_ASSERT(g_ut_nvme_regs.cc.bits.ams == SPDK_NVME_CC_AMS_RR); CU_ASSERT(ctrlr.opts.arb_mechanism == SPDK_NVME_CC_AMS_RR); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -546,6 +550,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_rr(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -567,6 +574,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_rr(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -588,6 +598,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_rr(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -657,6 +670,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_wrr(void) CU_ASSERT(g_ut_nvme_regs.cc.bits.ams == SPDK_NVME_CC_AMS_RR); CU_ASSERT(ctrlr.opts.arb_mechanism == SPDK_NVME_CC_AMS_RR); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -680,6 +696,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_wrr(void) CU_ASSERT(g_ut_nvme_regs.cc.bits.ams == SPDK_NVME_CC_AMS_WRR); CU_ASSERT(ctrlr.opts.arb_mechanism == SPDK_NVME_CC_AMS_WRR); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -701,6 +720,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_wrr(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -722,6 +744,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_wrr(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -790,6 +815,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_vs(void) CU_ASSERT(g_ut_nvme_regs.cc.bits.ams == SPDK_NVME_CC_AMS_RR); CU_ASSERT(ctrlr.opts.arb_mechanism == SPDK_NVME_CC_AMS_RR); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -811,6 +839,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_vs(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -834,6 +865,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_vs(void) CU_ASSERT(g_ut_nvme_regs.cc.bits.ams == SPDK_NVME_CC_AMS_VS); CU_ASSERT(ctrlr.opts.arb_mechanism == SPDK_NVME_CC_AMS_VS); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); @@ -855,6 +889,9 @@ test_nvme_ctrlr_init_en_0_rdy_0_ams_vs(void) CU_ASSERT(ctrlr.state == NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1); CU_ASSERT(g_ut_nvme_regs.cc.bits.en == 0); + /* + * Complete and destroy the controller + */ g_ut_nvme_regs.csts.bits.shst = SPDK_NVME_SHST_COMPLETE; nvme_ctrlr_destruct(&ctrlr); diff --git a/test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut.c b/test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut.c index ac0a50f33..5ae0f189a 100644 --- a/test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut.c @@ -253,6 +253,8 @@ nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size, req->cb_fn = cb_fn; req->cb_arg = cb_arg; + req->pid = getpid(); + return req; } diff --git a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c index 2a9b2c650..28187a0a9 100644 --- a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c @@ -65,6 +65,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { } +int +nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) +{ + return 0; +} + int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr) { diff --git a/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c b/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c index 6402343c8..861a13644 100644 --- a/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c +++ b/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c @@ -146,6 +146,7 @@ nvme_allocate_request(const struct nvme_payload *payload, uint32_t payload_size, req->cb_arg = cb_arg; req->payload = *payload; req->payload_size = payload_size; + req->pid = getpid(); return req; } @@ -223,6 +224,7 @@ prepare_submit_request_test(struct spdk_nvme_qpair *qpair, ctrlr->transport = &nvme_qpair_ut_transport; ctrlr->free_io_qids = NULL; TAILQ_INIT(&ctrlr->active_io_qpairs); + TAILQ_INIT(&ctrlr->active_procs); nvme_qpair_construct(qpair, 1, 128, ctrlr, 0); ut_fail_vtophys = false; @@ -581,6 +583,7 @@ static void test_nvme_qpair_destroy(void) memset(&ctrlr, 0, sizeof(ctrlr)); TAILQ_INIT(&ctrlr.free_io_qpairs); TAILQ_INIT(&ctrlr.active_io_qpairs); + TAILQ_INIT(&ctrlr.active_procs); nvme_qpair_construct(&qpair, 1, 128, &ctrlr); nvme_qpair_destroy(&qpair);