diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 84da4cdcd..1060796f4 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -114,6 +114,7 @@ struct spdk_uuid; enum spdk_bdev_status { SPDK_BDEV_STATUS_INVALID, SPDK_BDEV_STATUS_READY, + SPDK_BDEV_STATUS_UNREGISTERING, SPDK_BDEV_STATUS_REMOVING, }; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index d2b1d5f5e..b3e4d200f 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -3601,12 +3601,21 @@ bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add) total->unmap_latency_ticks += add->unmap_latency_ticks; } +static void +bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch) +{ + struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; + struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch; + + bdev_abort_all_queued_io(&shared_resource->nomem_io, ch); + bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch); + bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch); +} + static void bdev_channel_destroy(void *io_device, void *ctx_buf) { - struct spdk_bdev_channel *ch = ctx_buf; - struct spdk_bdev_mgmt_channel *mgmt_ch; - struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; + struct spdk_bdev_channel *ch = ctx_buf; SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name, spdk_get_thread()); @@ -3619,12 +3628,9 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat); pthread_mutex_unlock(&ch->bdev->internal.mutex); - mgmt_ch = shared_resource->mgmt_ch; - bdev_abort_all_queued_io(&ch->queued_resets, ch); - bdev_abort_all_queued_io(&shared_resource->nomem_io, ch); - bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch); - bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch); + + bdev_channel_abort_queued_ios(ch); if (ch->histogram) { spdk_histogram_data_free(ch->histogram); @@ -6259,11 +6265,43 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev) return rc; } +static void +bdev_unregister_abort_channel(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i); + struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(io_ch); + + bdev_channel_abort_queued_ios(bdev_ch); + spdk_for_each_channel_continue(i, 0); +} + +static void +bdev_unregister(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i); + int rc; + + pthread_mutex_lock(&g_bdev_mgr.mutex); + pthread_mutex_lock(&bdev->internal.mutex); + /* + * Set the status to REMOVING after completing to abort channels. Otherwise, + * the last spdk_bdev_close() may call spdk_io_device_unregister() while + * spdk_for_each_channel() is executed and spdk_io_device_unregister() may fail. + */ + bdev->internal.status = SPDK_BDEV_STATUS_REMOVING; + rc = bdev_unregister_unsafe(bdev); + pthread_mutex_unlock(&bdev->internal.mutex); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + + if (rc == 0) { + spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb); + } +} + void spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) { struct spdk_thread *thread; - int rc; SPDK_DEBUGLOG(bdev, "Removing bdev %s from list\n", bdev->name); @@ -6277,7 +6315,8 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void } pthread_mutex_lock(&g_bdev_mgr.mutex); - if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { + if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING || + bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { pthread_mutex_unlock(&g_bdev_mgr.mutex); if (cb_fn) { cb_fn(cb_arg, -EBUSY); @@ -6286,18 +6325,16 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void } pthread_mutex_lock(&bdev->internal.mutex); - bdev->internal.status = SPDK_BDEV_STATUS_REMOVING; + bdev->internal.status = SPDK_BDEV_STATUS_UNREGISTERING; bdev->internal.unregister_cb = cb_fn; bdev->internal.unregister_ctx = cb_arg; - - /* Call under lock. */ - rc = bdev_unregister_unsafe(bdev); pthread_mutex_unlock(&bdev->internal.mutex); pthread_mutex_unlock(&g_bdev_mgr.mutex); - if (rc == 0) { - spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb); - } + spdk_for_each_channel(__bdev_to_io_dev(bdev), + bdev_unregister_abort_channel, + bdev, + bdev_unregister); } static void @@ -6377,7 +6414,8 @@ bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc) desc->write = write; pthread_mutex_lock(&bdev->internal.mutex); - if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { + if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING || + bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { pthread_mutex_unlock(&bdev->internal.mutex); return -ENODEV; } diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 46b40e647..e2cf897b8 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -81,6 +81,8 @@ int g_status; int g_count; enum spdk_bdev_event_type g_event_type1; enum spdk_bdev_event_type g_event_type2; +enum spdk_bdev_event_type g_event_type3; +enum spdk_bdev_event_type g_event_type4; struct spdk_histogram_data *g_histogram; void *g_unregister_arg; int g_unregister_rc; @@ -568,6 +570,18 @@ bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *even } } +static void +bdev_open_cb3(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + g_event_type3 = type; +} + +static void +bdev_open_cb4(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + g_event_type4 = type; +} + static void get_device_stat_test(void) { @@ -3770,6 +3784,8 @@ bdev_open_while_hotremove(void) CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc[0])); spdk_bdev_unregister(bdev, NULL, NULL); + /* Bdev unregister is handled asynchronously. Poll thread to complete. */ + poll_threads(); rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[1]); CU_ASSERT(rc == -ENODEV); @@ -3850,6 +3866,87 @@ bdev_open_ext(void) poll_threads(); } +static void +bdev_open_ext_unregister(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc1 = NULL; + struct spdk_bdev_desc *desc2 = NULL; + struct spdk_bdev_desc *desc3 = NULL; + struct spdk_bdev_desc *desc4 = NULL; + int rc = 0; + + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1); + CU_ASSERT_EQUAL(rc, -EINVAL); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb3, &desc3, &desc3); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb4, &desc4, &desc4); + CU_ASSERT_EQUAL(rc, 0); + + g_event_type1 = 0xFF; + g_event_type2 = 0xFF; + g_event_type3 = 0xFF; + g_event_type4 = 0xFF; + + g_unregister_arg = NULL; + g_unregister_rc = -1; + + /* Simulate hot-unplug by unregistering bdev */ + spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678); + + /* + * Unregister is handled asynchronously and event callback + * (i.e., above bdev_open_cbN) will be called. + * For bdev_open_cb3 and bdev_open_cb4, it is intended to not + * close the desc3 and desc4 so that the bdev is not closed. + */ + poll_threads(); + + /* Check if correct events have been triggered in event callback fn */ + CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type3, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type4, SPDK_BDEV_EVENT_REMOVE); + + /* Check that unregister callback is delayed */ + CU_ASSERT(g_unregister_arg == NULL); + CU_ASSERT(g_unregister_rc == -1); + + /* + * Explicitly close desc3. As desc4 is still opened there, the + * unergister callback is still delayed to execute. + */ + spdk_bdev_close(desc3); + CU_ASSERT(g_unregister_arg == NULL); + CU_ASSERT(g_unregister_rc == -1); + + /* + * Explicitly close desc4 to trigger the ongoing bdev unregister + * operation after last desc is closed. + */ + spdk_bdev_close(desc4); + + /* Poll the thread for the async unregister operation */ + poll_threads(); + + /* Check that unregister callback is executed */ + CU_ASSERT(g_unregister_arg == (void *)0x12345678); + CU_ASSERT(g_unregister_rc == 0); + + free_bdev(bdev); + poll_threads(); +} + struct timeout_io_cb_arg { struct iovec iov; uint8_t type; @@ -5350,6 +5447,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_open_while_hotremove); CU_ADD_TEST(suite, bdev_close_while_hotremove); CU_ADD_TEST(suite, bdev_open_ext); + CU_ADD_TEST(suite, bdev_open_ext_unregister); CU_ADD_TEST(suite, bdev_set_io_timeout); CU_ADD_TEST(suite, lba_range_overlap); CU_ADD_TEST(suite, lock_lba_range_check_ranges); diff --git a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c index 591d3c7c9..fdb3fe471 100644 --- a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c @@ -280,6 +280,8 @@ unregister_bdev(struct ut_bdev *ut_bdev) /* Handle any deferred messages. */ poll_threads(); spdk_bdev_unregister(&ut_bdev->bdev, NULL, NULL); + /* Handle the async bdev unregister. */ + poll_threads(); } static void @@ -1218,6 +1220,59 @@ enomem_multi_bdev(void) teardown_test(); } +static void +enomem_multi_bdev_unregister(void) +{ + struct spdk_io_channel *io_ch; + struct spdk_bdev_channel *bdev_ch; + struct spdk_bdev_shared_resource *shared_resource; + struct ut_bdev_channel *ut_ch; + const uint32_t IO_ARRAY_SIZE = 64; + const uint32_t AVAIL = 20; + enum spdk_bdev_io_status status[IO_ARRAY_SIZE]; + uint32_t i; + int rc; + + setup_test(); + + set_thread(0); + io_ch = spdk_bdev_get_io_channel(g_desc); + bdev_ch = spdk_io_channel_get_ctx(io_ch); + shared_resource = bdev_ch->shared_resource; + ut_ch = spdk_io_channel_get_ctx(bdev_ch->channel); + ut_ch->avail_cnt = AVAIL; + + /* Saturate io_target through the bdev. */ + for (i = 0; i < AVAIL; i++) { + status[i] = SPDK_BDEV_IO_STATUS_PENDING; + rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[i]); + CU_ASSERT(rc == 0); + } + CU_ASSERT(TAILQ_EMPTY(&shared_resource->nomem_io)); + + /* + * Now submit I/O through the bdev. This should fail with ENOMEM + * and then go onto the nomem_io list. + */ + status[AVAIL] = SPDK_BDEV_IO_STATUS_PENDING; + rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[AVAIL]); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(!TAILQ_EMPTY(&shared_resource->nomem_io)); + + /* Unregister the bdev to abort the IOs from nomem_io queue. */ + unregister_bdev(&g_bdev); + CU_ASSERT(status[AVAIL] == SPDK_BDEV_IO_STATUS_FAILED); + SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&shared_resource->nomem_io)); + SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == AVAIL); + + /* Complete the bdev's I/O. */ + stub_complete_io(g_bdev.io_target, AVAIL); + SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == 0); + + spdk_put_io_channel(io_ch); + poll_threads(); + teardown_test(); +} static void enomem_multi_io_target(void) @@ -2070,6 +2125,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, io_during_qos_reset); CU_ADD_TEST(suite, enomem); CU_ADD_TEST(suite, enomem_multi_bdev); + CU_ADD_TEST(suite, enomem_multi_bdev_unregister); CU_ADD_TEST(suite, enomem_multi_io_target); CU_ADD_TEST(suite, qos_dynamic_enable); CU_ADD_TEST(suite, bdev_histograms_mt);