bdev: Fix race among bdev_reset(), bdev_close(), and bdev_unregister()

There is a race condition when a bdev is unregistered while reset is
submitted from the upper layer very frequently.

spdk_io_device_unregister() may fail because it is called while
spdk_for_each_channel() is processed.

    spdk_io_device_unregister io_device bdev_Nvme0n1 (0x7f4be8053aa1)
    has 1 for_each calls outstanding

To avoid this failure, defer calling spdk_io_device_unregister() until
reset completes if reset is in progress when unregistration is ready
to do, and then reset completion calls spdk_io_device_unregister()
later.

A bdev cannot be opened if it is already deleting. So we do not need
to hold mutex.

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ida1681ba9f3096670ff62274b35bb3e4fd69398a
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12222
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Michael Haeuptle <michaelhaeuptle@gmail.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-04-12 12:57:10 +09:00 committed by Tomasz Zawadzki
parent 2a6a64485c
commit 494eb6e58b
2 changed files with 83 additions and 1 deletions

View File

@ -5760,10 +5760,13 @@ bdev_io_complete(void *ctx)
bdev_io->internal.caller_ctx);
}
static void bdev_destroy_cb(void *io_device);
static void
bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
{
struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
struct spdk_bdev *bdev = bdev_io->bdev;
if (bdev_io->u.reset.ch_ref != NULL) {
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
@ -5771,6 +5774,11 @@ bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
}
bdev_io_complete(bdev_io);
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING &&
TAILQ_EMPTY(&bdev->internal.open_descs)) {
spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
}
}
static void
@ -6243,6 +6251,13 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev)
bdev_alias_del(bdev, uuid, bdev_name_del_unsafe);
spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));
if (bdev->internal.reset_in_progress != NULL) {
/* If reset is in progress, let the completion callback for reset
* unregister the bdev.
*/
rc = -EBUSY;
}
}
return rc;

View File

@ -204,7 +204,6 @@ stub_complete_io(void *io_target, uint32_t num_to_complete)
ch->avail_cnt++;
num_completed++;
}
spdk_put_io_channel(_ch);
return num_completed;
}
@ -1983,6 +1982,73 @@ lock_lba_range_then_submit_io(void)
teardown_test();
}
/* spdk_bdev_reset() freezes and unfreezes I/O channels by using spdk_for_each_channel().
* spdk_bdev_unregister() calls spdk_io_device_unregister() in the end. However
* spdk_io_device_unregister() fails if it is called while executing spdk_for_each_channel().
* Hence, in this case, spdk_io_device_unregister() is deferred until spdk_bdev_reset()
* completes. Test this behavior.
*/
static void
unregister_during_reset(void)
{
struct spdk_io_channel *io_ch[2];
bool done_reset = false, done_unregister = false;
int rc;
setup_test();
set_thread(0);
io_ch[0] = spdk_bdev_get_io_channel(g_desc);
SPDK_CU_ASSERT_FATAL(io_ch[0] != NULL);
set_thread(1);
io_ch[1] = spdk_bdev_get_io_channel(g_desc);
SPDK_CU_ASSERT_FATAL(io_ch[1] != NULL);
set_thread(0);
CU_ASSERT(g_bdev.bdev.internal.reset_in_progress == NULL);
rc = spdk_bdev_reset(g_desc, io_ch[0], reset_done, &done_reset);
CU_ASSERT(rc == 0);
set_thread(0);
poll_thread_times(0, 1);
spdk_bdev_close(g_desc);
spdk_bdev_unregister(&g_bdev.bdev, _bdev_unregistered, &done_unregister);
CU_ASSERT(done_reset == false);
CU_ASSERT(done_unregister == false);
poll_threads();
stub_complete_io(g_bdev.io_target, 0);
poll_threads();
CU_ASSERT(done_reset == true);
CU_ASSERT(done_unregister == false);
spdk_put_io_channel(io_ch[0]);
set_thread(1);
spdk_put_io_channel(io_ch[1]);
poll_threads();
CU_ASSERT(done_unregister == true);
/* Restore the original g_bdev so that we can use teardown_test(). */
set_thread(0);
register_bdev(&g_bdev, "ut_bdev", &g_io_device);
spdk_bdev_open_ext("ut_bdev", true, _bdev_event_cb, NULL, &g_desc);
teardown_test();
}
int
main(int argc, char **argv)
{
@ -2009,6 +2075,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, bdev_histograms_mt);
CU_ADD_TEST(suite, bdev_set_io_timeout_mt);
CU_ADD_TEST(suite, lock_lba_range_then_submit_io);
CU_ADD_TEST(suite, unregister_during_reset);
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();