From a4925ba74493a839437e66eb00462ff06eb92cda Mon Sep 17 00:00:00 2001 From: Seth Howell Date: Fri, 25 Oct 2019 09:24:27 -0700 Subject: [PATCH] nvme: take the lock when disconnecting qpairs. If we disconnect qpairs without taking the lock, we run the risk of trying to double free qpair resources before they have been marked as NULL. For example, polling on one thread and calling nvme_rdma_qpair_disconnect from one thread while doing an nvme_ctrlr_reset on another thread. nvme_ctrlr_reset will call down to nvme_rdma_qpair_disconnect on the same qpair and without any locking it can result in trying to destroy the qpair resources multiple times. Change-Id: I9eef6f2f92961ef8e3f8ece0e4a3d54f3434cff8 Signed-off-by: Seth Howell Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/472413 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- lib/nvme/nvme_ctrlr.c | 18 ++++++++++++++++++ lib/nvme/nvme_internal.h | 9 +++++---- lib/nvme/nvme_rdma.c | 11 ++++++++++- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 2eecfd837..0780093ba 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -425,6 +425,24 @@ out: return rc; } +/* + * This internal function will attempt to take the controller + * lock before calling disconnect on a controller qpair. + * Functions already holding the controller lock should + * call nvme_transport_ctrlr_disconnect_qpair directly. + */ +void +nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair) +{ + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + + assert(ctrlr != NULL); + + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); + nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); +} + int spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) { diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index a247442ec..83a5e6244 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -858,10 +858,11 @@ int nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register int nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz); void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap, const union spdk_nvme_vs_register *vs); -int nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id, - struct spdk_nvme_ctrlr *ctrlr, - enum spdk_nvme_qprio qprio, - uint32_t num_requests); +void nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair); +int nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id, + struct spdk_nvme_ctrlr *ctrlr, + enum spdk_nvme_qprio qprio, + uint32_t num_requests); void nvme_qpair_deinit(struct spdk_nvme_qpair *qpair); void nvme_qpair_complete_error_reqs(struct spdk_nvme_qpair *qpair); int nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index b909b794a..8ad3ffd0f 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -1911,7 +1911,16 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair, return reaped; fail: - nvme_rdma_qpair_disconnect(qpair); + /* + * Since admin queues take the ctrlr_lock before entering this function, + * we can call nvme_rdma_qpair_disconnect. For other qpairs we need + * to call the generic function which will take the lock for us. + */ + if (nvme_qpair_is_admin_queue(qpair)) { + nvme_rdma_qpair_disconnect(qpair); + } else { + nvme_ctrlr_disconnect_qpair(qpair); + } return -ENXIO; }