From 494eb6e58b8e72762d0a2127f2657926298e2523 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Tue, 12 Apr 2022 12:57:10 +0900 Subject: [PATCH] bdev: Fix race among bdev_reset(), bdev_close(), and bdev_unregister() There is a race condition when a bdev is unregistered while reset is submitted from the upper layer very frequently. spdk_io_device_unregister() may fail because it is called while spdk_for_each_channel() is processed. spdk_io_device_unregister io_device bdev_Nvme0n1 (0x7f4be8053aa1) has 1 for_each calls outstanding To avoid this failure, defer calling spdk_io_device_unregister() until reset completes if reset is in progress when unregistration is ready to do, and then reset completion calls spdk_io_device_unregister() later. A bdev cannot be opened if it is already deleting. So we do not need to hold mutex. Signed-off-by: Shuhei Matsumoto Change-Id: Ida1681ba9f3096670ff62274b35bb3e4fd69398a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12222 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Ben Walker Reviewed-by: Michael Haeuptle Reviewed-by: Jim Harris Reviewed-by: Paul Luse --- lib/bdev/bdev.c | 15 ++++++ test/unit/lib/bdev/mt/bdev.c/bdev_ut.c | 69 +++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 8f6baebf4..8a22ac2c9 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -5760,10 +5760,13 @@ bdev_io_complete(void *ctx) bdev_io->internal.caller_ctx); } +static void bdev_destroy_cb(void *io_device); + static void bdev_reset_complete(struct spdk_io_channel_iter *i, int status) { struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i); + struct spdk_bdev *bdev = bdev_io->bdev; if (bdev_io->u.reset.ch_ref != NULL) { spdk_put_io_channel(bdev_io->u.reset.ch_ref); @@ -5771,6 +5774,11 @@ bdev_reset_complete(struct spdk_io_channel_iter *i, int status) } bdev_io_complete(bdev_io); + + if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && + TAILQ_EMPTY(&bdev->internal.open_descs)) { + spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb); + } } static void @@ -6243,6 +6251,13 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev) bdev_alias_del(bdev, uuid, bdev_name_del_unsafe); spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev)); + + if (bdev->internal.reset_in_progress != NULL) { + /* If reset is in progress, let the completion callback for reset + * unregister the bdev. + */ + rc = -EBUSY; + } } return rc; diff --git a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c index 393939b9a..591d3c7c9 100644 --- a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c @@ -204,7 +204,6 @@ stub_complete_io(void *io_target, uint32_t num_to_complete) ch->avail_cnt++; num_completed++; } - spdk_put_io_channel(_ch); return num_completed; } @@ -1983,6 +1982,73 @@ lock_lba_range_then_submit_io(void) teardown_test(); } +/* spdk_bdev_reset() freezes and unfreezes I/O channels by using spdk_for_each_channel(). + * spdk_bdev_unregister() calls spdk_io_device_unregister() in the end. However + * spdk_io_device_unregister() fails if it is called while executing spdk_for_each_channel(). + * Hence, in this case, spdk_io_device_unregister() is deferred until spdk_bdev_reset() + * completes. Test this behavior. + */ +static void +unregister_during_reset(void) +{ + struct spdk_io_channel *io_ch[2]; + bool done_reset = false, done_unregister = false; + int rc; + + setup_test(); + set_thread(0); + + io_ch[0] = spdk_bdev_get_io_channel(g_desc); + SPDK_CU_ASSERT_FATAL(io_ch[0] != NULL); + + set_thread(1); + + io_ch[1] = spdk_bdev_get_io_channel(g_desc); + SPDK_CU_ASSERT_FATAL(io_ch[1] != NULL); + + set_thread(0); + + CU_ASSERT(g_bdev.bdev.internal.reset_in_progress == NULL); + + rc = spdk_bdev_reset(g_desc, io_ch[0], reset_done, &done_reset); + CU_ASSERT(rc == 0); + + set_thread(0); + + poll_thread_times(0, 1); + + spdk_bdev_close(g_desc); + spdk_bdev_unregister(&g_bdev.bdev, _bdev_unregistered, &done_unregister); + + CU_ASSERT(done_reset == false); + CU_ASSERT(done_unregister == false); + + poll_threads(); + + stub_complete_io(g_bdev.io_target, 0); + + poll_threads(); + + CU_ASSERT(done_reset == true); + CU_ASSERT(done_unregister == false); + + spdk_put_io_channel(io_ch[0]); + + set_thread(1); + + spdk_put_io_channel(io_ch[1]); + + poll_threads(); + + CU_ASSERT(done_unregister == true); + + /* Restore the original g_bdev so that we can use teardown_test(). */ + set_thread(0); + register_bdev(&g_bdev, "ut_bdev", &g_io_device); + spdk_bdev_open_ext("ut_bdev", true, _bdev_event_cb, NULL, &g_desc); + teardown_test(); +} + int main(int argc, char **argv) { @@ -2009,6 +2075,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_histograms_mt); CU_ADD_TEST(suite, bdev_set_io_timeout_mt); CU_ADD_TEST(suite, lock_lba_range_then_submit_io); + CU_ADD_TEST(suite, unregister_during_reset); CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();