bdev: Fix race among bdev_reset(), bdev_close(), and bdev_unregister()
There is a race condition when a bdev is unregistered while reset is submitted from the upper layer very frequently. spdk_io_device_unregister() may fail because it is called while spdk_for_each_channel() is processed. spdk_io_device_unregister io_device bdev_Nvme0n1 (0x7f4be8053aa1) has 1 for_each calls outstanding To avoid this failure, defer calling spdk_io_device_unregister() until reset completes if reset is in progress when unregistration is ready to do, and then reset completion calls spdk_io_device_unregister() later. A bdev cannot be opened if it is already deleting. So we do not need to hold mutex. Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: Ida1681ba9f3096670ff62274b35bb3e4fd69398a Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12222 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Michael Haeuptle <michaelhaeuptle@gmail.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Paul Luse <paul.e.luse@intel.com>
This commit is contained in:
parent
2a6a64485c
commit
494eb6e58b
@ -5760,10 +5760,13 @@ bdev_io_complete(void *ctx)
|
||||
bdev_io->internal.caller_ctx);
|
||||
}
|
||||
|
||||
static void bdev_destroy_cb(void *io_device);
|
||||
|
||||
static void
|
||||
bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
|
||||
struct spdk_bdev *bdev = bdev_io->bdev;
|
||||
|
||||
if (bdev_io->u.reset.ch_ref != NULL) {
|
||||
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
|
||||
@ -5771,6 +5774,11 @@ bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
|
||||
}
|
||||
|
||||
bdev_io_complete(bdev_io);
|
||||
|
||||
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING &&
|
||||
TAILQ_EMPTY(&bdev->internal.open_descs)) {
|
||||
spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -6243,6 +6251,13 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev)
|
||||
bdev_alias_del(bdev, uuid, bdev_name_del_unsafe);
|
||||
|
||||
spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));
|
||||
|
||||
if (bdev->internal.reset_in_progress != NULL) {
|
||||
/* If reset is in progress, let the completion callback for reset
|
||||
* unregister the bdev.
|
||||
*/
|
||||
rc = -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -204,7 +204,6 @@ stub_complete_io(void *io_target, uint32_t num_to_complete)
|
||||
ch->avail_cnt++;
|
||||
num_completed++;
|
||||
}
|
||||
|
||||
spdk_put_io_channel(_ch);
|
||||
return num_completed;
|
||||
}
|
||||
@ -1983,6 +1982,73 @@ lock_lba_range_then_submit_io(void)
|
||||
teardown_test();
|
||||
}
|
||||
|
||||
/* spdk_bdev_reset() freezes and unfreezes I/O channels by using spdk_for_each_channel().
|
||||
* spdk_bdev_unregister() calls spdk_io_device_unregister() in the end. However
|
||||
* spdk_io_device_unregister() fails if it is called while executing spdk_for_each_channel().
|
||||
* Hence, in this case, spdk_io_device_unregister() is deferred until spdk_bdev_reset()
|
||||
* completes. Test this behavior.
|
||||
*/
|
||||
static void
|
||||
unregister_during_reset(void)
|
||||
{
|
||||
struct spdk_io_channel *io_ch[2];
|
||||
bool done_reset = false, done_unregister = false;
|
||||
int rc;
|
||||
|
||||
setup_test();
|
||||
set_thread(0);
|
||||
|
||||
io_ch[0] = spdk_bdev_get_io_channel(g_desc);
|
||||
SPDK_CU_ASSERT_FATAL(io_ch[0] != NULL);
|
||||
|
||||
set_thread(1);
|
||||
|
||||
io_ch[1] = spdk_bdev_get_io_channel(g_desc);
|
||||
SPDK_CU_ASSERT_FATAL(io_ch[1] != NULL);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
CU_ASSERT(g_bdev.bdev.internal.reset_in_progress == NULL);
|
||||
|
||||
rc = spdk_bdev_reset(g_desc, io_ch[0], reset_done, &done_reset);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
spdk_bdev_close(g_desc);
|
||||
spdk_bdev_unregister(&g_bdev.bdev, _bdev_unregistered, &done_unregister);
|
||||
|
||||
CU_ASSERT(done_reset == false);
|
||||
CU_ASSERT(done_unregister == false);
|
||||
|
||||
poll_threads();
|
||||
|
||||
stub_complete_io(g_bdev.io_target, 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(done_reset == true);
|
||||
CU_ASSERT(done_unregister == false);
|
||||
|
||||
spdk_put_io_channel(io_ch[0]);
|
||||
|
||||
set_thread(1);
|
||||
|
||||
spdk_put_io_channel(io_ch[1]);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(done_unregister == true);
|
||||
|
||||
/* Restore the original g_bdev so that we can use teardown_test(). */
|
||||
set_thread(0);
|
||||
register_bdev(&g_bdev, "ut_bdev", &g_io_device);
|
||||
spdk_bdev_open_ext("ut_bdev", true, _bdev_event_cb, NULL, &g_desc);
|
||||
teardown_test();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -2009,6 +2075,7 @@ main(int argc, char **argv)
|
||||
CU_ADD_TEST(suite, bdev_histograms_mt);
|
||||
CU_ADD_TEST(suite, bdev_set_io_timeout_mt);
|
||||
CU_ADD_TEST(suite, lock_lba_range_then_submit_io);
|
||||
CU_ADD_TEST(suite, unregister_during_reset);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
CU_basic_run_tests();
|
||||
|
Loading…
Reference in New Issue
Block a user