From 8a0967a22a43bd129eecb0a814e6cfabcdfe7130 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Thu, 16 Sep 2021 14:24:37 +0900 Subject: [PATCH] bdev/nvme: Protect ANA log page from concurrent reads by using an new flag If an I/O failed by ANA error, the corresponding ANA state might be out of date. In the following patches, for this case, read the latest ANA log page and update the ANA state. Such reading ANA log page may be done on multiple threads concurrently including AER ANA change. Hence protect ANA log page by adding an new flag ana_log_page_updating to struct nvme_ctrlr and using it. Signed-off-by: Shuhei Matsumoto Change-Id: I8bb84091d50a5fdc0d9893b585be972dfd31c0f1 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9526 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk --- module/bdev/nvme/bdev_nvme.c | 45 +++++----- module/bdev/nvme/bdev_nvme.h | 1 + .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 85 +++++++++++++++++++ 3 files changed, 112 insertions(+), 19 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index dd1791fe4..a9932c68a 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -532,7 +532,7 @@ nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr) nvme_ctrlr->ref--; if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct || - nvme_ctrlr->resetting) { + nvme_ctrlr->resetting || nvme_ctrlr->ana_log_page_updating) { pthread_mutex_unlock(&nvme_ctrlr->mutex); return; } @@ -1149,7 +1149,8 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status) path_id->is_failed = !success; - if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct) { + if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct && + !nvme_ctrlr->ana_log_page_updating) { /* Complete pending destruct after reset completes. */ complete_pending_destruct = true; } @@ -2716,19 +2717,6 @@ nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr) } } -static bool -nvme_ctrlr_acquire(struct nvme_ctrlr *nvme_ctrlr) -{ - pthread_mutex_lock(&nvme_ctrlr->mutex); - if (nvme_ctrlr->destruct || nvme_ctrlr->resetting) { - pthread_mutex_unlock(&nvme_ctrlr->mutex); - return false; - } - nvme_ctrlr->ref++; - pthread_mutex_unlock(&nvme_ctrlr->mutex); - return true; -} - static int nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg) @@ -2763,12 +2751,25 @@ nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl) { struct nvme_ctrlr *nvme_ctrlr = ctx; - if (spdk_nvme_cpl_is_success(cpl)) { + if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) { bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states, nvme_ctrlr); } - nvme_ctrlr_release(nvme_ctrlr); + pthread_mutex_lock(&nvme_ctrlr->mutex); + + assert(nvme_ctrlr->ana_log_page_updating == true); + nvme_ctrlr->ana_log_page_updating = false; + + if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct || + nvme_ctrlr->resetting) { + pthread_mutex_unlock(&nvme_ctrlr->mutex); + return; + } + + pthread_mutex_unlock(&nvme_ctrlr->mutex); + + nvme_ctrlr_unregister(nvme_ctrlr); } static void @@ -2780,10 +2781,16 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr) return; } - if (!nvme_ctrlr_acquire(nvme_ctrlr)) { + pthread_mutex_lock(&nvme_ctrlr->mutex); + if (nvme_ctrlr->destruct || nvme_ctrlr->resetting || + nvme_ctrlr->ana_log_page_updating) { + pthread_mutex_unlock(&nvme_ctrlr->mutex); return; } + nvme_ctrlr->ana_log_page_updating = true; + pthread_mutex_unlock(&nvme_ctrlr->mutex); + rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS, SPDK_NVME_GLOBAL_NS_TAG, @@ -2792,7 +2799,7 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr) nvme_ctrlr_read_ana_log_page_done, nvme_ctrlr); if (rc != 0) { - nvme_ctrlr_release(nvme_ctrlr); + nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL); } } diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 68ac302de..0b2050f5d 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -104,6 +104,7 @@ struct nvme_ctrlr { uint32_t resetting : 1; uint32_t failover_in_progress : 1; uint32_t destruct : 1; + uint32_t ana_log_page_updating : 1; /** * PI check flags. This flags is set to NVMe controllers created only * through bdev_nvme_attach_controller RPC or .INI config file. Hot added diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index c27d2a5ad..f36cde08c 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4392,6 +4392,90 @@ test_retry_io_count(void) g_opts.bdev_retry_count = 0; } +static void +test_concurrent_read_ana_log_page(void) +{ + struct spdk_nvme_transport_id trid = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&trid, 1, true, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + ctrlr->ns[0].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE; + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); + SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); + + nvme_ctrlr_read_ana_log_page(nvme_ctrlr); + + CU_ASSERT(nvme_ctrlr->ana_log_page_updating == true); + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + + /* Following read request should be rejected. */ + nvme_ctrlr_read_ana_log_page(nvme_ctrlr); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + + set_thread(1); + + nvme_ctrlr_read_ana_log_page(nvme_ctrlr); + + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1); + + /* Reset request while reading ANA log page should not be rejected. */ + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + + poll_threads(); + + spdk_delay_us(g_opts.nvme_adminq_poll_period_us); + poll_threads(); + + CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false); + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + + /* Read ANA log page while resetting ctrlr should be rejected. */ + rc = bdev_nvme_reset(nvme_ctrlr); + CU_ASSERT(rc == 0); + + nvme_ctrlr_read_ana_log_page(nvme_ctrlr); + + CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false); + + set_thread(0); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL); +} + int main(int argc, const char **argv) { @@ -4428,6 +4512,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting); CU_ADD_TEST(suite, test_retry_io_for_io_path_error); CU_ADD_TEST(suite, test_retry_io_count); + CU_ADD_TEST(suite, test_concurrent_read_ana_log_page); CU_basic_set_mode(CU_BRM_VERBOSE);