From 7bcd316de1f71f5ca5303d08ac26df20dcd05669 Mon Sep 17 00:00:00 2001 From: GangCao Date: Fri, 6 May 2022 10:21:29 -0400 Subject: [PATCH] bdev: abort all IOs when unregistering the bdev To fix issue: #2484 When unregistering the bdev, will send out the message to each thread to abort all the IOs including IOs from nomem_io queue, need_buf_small queue and need_buf_large queue. The new SPDK_BDEV_STATUS_UNREGISTERING state is newly added to indicate this unregister operation. In this case, the bdev unregister operation becomes the async operation as each thread will be sent the message to abort the IOs and as the last step, it will unregister the required bdev and associted io device. On the other hand, the queued_resets will be handled separately and not aborted in the bdev unregister. New unit test cases are also added: enomem_multi_bdev_unregister: to abort the IO from nomem_io queue during the unregister operation bdev_open_ext_unregister: to handle the events and async operations from the unregister operation Change-Id: Ib1663c0f71ffe87144869cb3a684e18eb956046b Signed-off-by: GangCao Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12573 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Aleksey Marchuk Reviewed-by: Dong Yi --- include/spdk/bdev.h | 1 + lib/bdev/bdev.c | 74 ++++++++++++++----- test/unit/lib/bdev/bdev.c/bdev_ut.c | 98 ++++++++++++++++++++++++++ test/unit/lib/bdev/mt/bdev.c/bdev_ut.c | 56 +++++++++++++++ 4 files changed, 211 insertions(+), 18 deletions(-) diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 84da4cdcd..1060796f4 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -114,6 +114,7 @@ struct spdk_uuid; enum spdk_bdev_status { SPDK_BDEV_STATUS_INVALID, SPDK_BDEV_STATUS_READY, + SPDK_BDEV_STATUS_UNREGISTERING, SPDK_BDEV_STATUS_REMOVING, }; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index d2b1d5f5e..b3e4d200f 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -3601,12 +3601,21 @@ bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add) total->unmap_latency_ticks += add->unmap_latency_ticks; } +static void +bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch) +{ + struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; + struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch; + + bdev_abort_all_queued_io(&shared_resource->nomem_io, ch); + bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch); + bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch); +} + static void bdev_channel_destroy(void *io_device, void *ctx_buf) { - struct spdk_bdev_channel *ch = ctx_buf; - struct spdk_bdev_mgmt_channel *mgmt_ch; - struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; + struct spdk_bdev_channel *ch = ctx_buf; SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name, spdk_get_thread()); @@ -3619,12 +3628,9 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat); pthread_mutex_unlock(&ch->bdev->internal.mutex); - mgmt_ch = shared_resource->mgmt_ch; - bdev_abort_all_queued_io(&ch->queued_resets, ch); - bdev_abort_all_queued_io(&shared_resource->nomem_io, ch); - bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch); - bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch); + + bdev_channel_abort_queued_ios(ch); if (ch->histogram) { spdk_histogram_data_free(ch->histogram); @@ -6259,11 +6265,43 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev) return rc; } +static void +bdev_unregister_abort_channel(struct spdk_io_channel_iter *i) +{ + struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i); + struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(io_ch); + + bdev_channel_abort_queued_ios(bdev_ch); + spdk_for_each_channel_continue(i, 0); +} + +static void +bdev_unregister(struct spdk_io_channel_iter *i, int status) +{ + struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i); + int rc; + + pthread_mutex_lock(&g_bdev_mgr.mutex); + pthread_mutex_lock(&bdev->internal.mutex); + /* + * Set the status to REMOVING after completing to abort channels. Otherwise, + * the last spdk_bdev_close() may call spdk_io_device_unregister() while + * spdk_for_each_channel() is executed and spdk_io_device_unregister() may fail. + */ + bdev->internal.status = SPDK_BDEV_STATUS_REMOVING; + rc = bdev_unregister_unsafe(bdev); + pthread_mutex_unlock(&bdev->internal.mutex); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + + if (rc == 0) { + spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb); + } +} + void spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) { struct spdk_thread *thread; - int rc; SPDK_DEBUGLOG(bdev, "Removing bdev %s from list\n", bdev->name); @@ -6277,7 +6315,8 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void } pthread_mutex_lock(&g_bdev_mgr.mutex); - if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { + if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING || + bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { pthread_mutex_unlock(&g_bdev_mgr.mutex); if (cb_fn) { cb_fn(cb_arg, -EBUSY); @@ -6286,18 +6325,16 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void } pthread_mutex_lock(&bdev->internal.mutex); - bdev->internal.status = SPDK_BDEV_STATUS_REMOVING; + bdev->internal.status = SPDK_BDEV_STATUS_UNREGISTERING; bdev->internal.unregister_cb = cb_fn; bdev->internal.unregister_ctx = cb_arg; - - /* Call under lock. */ - rc = bdev_unregister_unsafe(bdev); pthread_mutex_unlock(&bdev->internal.mutex); pthread_mutex_unlock(&g_bdev_mgr.mutex); - if (rc == 0) { - spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb); - } + spdk_for_each_channel(__bdev_to_io_dev(bdev), + bdev_unregister_abort_channel, + bdev, + bdev_unregister); } static void @@ -6377,7 +6414,8 @@ bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc) desc->write = write; pthread_mutex_lock(&bdev->internal.mutex); - if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { + if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING || + bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) { pthread_mutex_unlock(&bdev->internal.mutex); return -ENODEV; } diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 46b40e647..e2cf897b8 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -81,6 +81,8 @@ int g_status; int g_count; enum spdk_bdev_event_type g_event_type1; enum spdk_bdev_event_type g_event_type2; +enum spdk_bdev_event_type g_event_type3; +enum spdk_bdev_event_type g_event_type4; struct spdk_histogram_data *g_histogram; void *g_unregister_arg; int g_unregister_rc; @@ -568,6 +570,18 @@ bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *even } } +static void +bdev_open_cb3(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + g_event_type3 = type; +} + +static void +bdev_open_cb4(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + g_event_type4 = type; +} + static void get_device_stat_test(void) { @@ -3770,6 +3784,8 @@ bdev_open_while_hotremove(void) CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc[0])); spdk_bdev_unregister(bdev, NULL, NULL); + /* Bdev unregister is handled asynchronously. Poll thread to complete. */ + poll_threads(); rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[1]); CU_ASSERT(rc == -ENODEV); @@ -3850,6 +3866,87 @@ bdev_open_ext(void) poll_threads(); } +static void +bdev_open_ext_unregister(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc1 = NULL; + struct spdk_bdev_desc *desc2 = NULL; + struct spdk_bdev_desc *desc3 = NULL; + struct spdk_bdev_desc *desc4 = NULL; + int rc = 0; + + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1); + CU_ASSERT_EQUAL(rc, -EINVAL); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb3, &desc3, &desc3); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb4, &desc4, &desc4); + CU_ASSERT_EQUAL(rc, 0); + + g_event_type1 = 0xFF; + g_event_type2 = 0xFF; + g_event_type3 = 0xFF; + g_event_type4 = 0xFF; + + g_unregister_arg = NULL; + g_unregister_rc = -1; + + /* Simulate hot-unplug by unregistering bdev */ + spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678); + + /* + * Unregister is handled asynchronously and event callback + * (i.e., above bdev_open_cbN) will be called. + * For bdev_open_cb3 and bdev_open_cb4, it is intended to not + * close the desc3 and desc4 so that the bdev is not closed. + */ + poll_threads(); + + /* Check if correct events have been triggered in event callback fn */ + CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type3, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type4, SPDK_BDEV_EVENT_REMOVE); + + /* Check that unregister callback is delayed */ + CU_ASSERT(g_unregister_arg == NULL); + CU_ASSERT(g_unregister_rc == -1); + + /* + * Explicitly close desc3. As desc4 is still opened there, the + * unergister callback is still delayed to execute. + */ + spdk_bdev_close(desc3); + CU_ASSERT(g_unregister_arg == NULL); + CU_ASSERT(g_unregister_rc == -1); + + /* + * Explicitly close desc4 to trigger the ongoing bdev unregister + * operation after last desc is closed. + */ + spdk_bdev_close(desc4); + + /* Poll the thread for the async unregister operation */ + poll_threads(); + + /* Check that unregister callback is executed */ + CU_ASSERT(g_unregister_arg == (void *)0x12345678); + CU_ASSERT(g_unregister_rc == 0); + + free_bdev(bdev); + poll_threads(); +} + struct timeout_io_cb_arg { struct iovec iov; uint8_t type; @@ -5350,6 +5447,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_open_while_hotremove); CU_ADD_TEST(suite, bdev_close_while_hotremove); CU_ADD_TEST(suite, bdev_open_ext); + CU_ADD_TEST(suite, bdev_open_ext_unregister); CU_ADD_TEST(suite, bdev_set_io_timeout); CU_ADD_TEST(suite, lba_range_overlap); CU_ADD_TEST(suite, lock_lba_range_check_ranges); diff --git a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c index 591d3c7c9..fdb3fe471 100644 --- a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c @@ -280,6 +280,8 @@ unregister_bdev(struct ut_bdev *ut_bdev) /* Handle any deferred messages. */ poll_threads(); spdk_bdev_unregister(&ut_bdev->bdev, NULL, NULL); + /* Handle the async bdev unregister. */ + poll_threads(); } static void @@ -1218,6 +1220,59 @@ enomem_multi_bdev(void) teardown_test(); } +static void +enomem_multi_bdev_unregister(void) +{ + struct spdk_io_channel *io_ch; + struct spdk_bdev_channel *bdev_ch; + struct spdk_bdev_shared_resource *shared_resource; + struct ut_bdev_channel *ut_ch; + const uint32_t IO_ARRAY_SIZE = 64; + const uint32_t AVAIL = 20; + enum spdk_bdev_io_status status[IO_ARRAY_SIZE]; + uint32_t i; + int rc; + + setup_test(); + + set_thread(0); + io_ch = spdk_bdev_get_io_channel(g_desc); + bdev_ch = spdk_io_channel_get_ctx(io_ch); + shared_resource = bdev_ch->shared_resource; + ut_ch = spdk_io_channel_get_ctx(bdev_ch->channel); + ut_ch->avail_cnt = AVAIL; + + /* Saturate io_target through the bdev. */ + for (i = 0; i < AVAIL; i++) { + status[i] = SPDK_BDEV_IO_STATUS_PENDING; + rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[i]); + CU_ASSERT(rc == 0); + } + CU_ASSERT(TAILQ_EMPTY(&shared_resource->nomem_io)); + + /* + * Now submit I/O through the bdev. This should fail with ENOMEM + * and then go onto the nomem_io list. + */ + status[AVAIL] = SPDK_BDEV_IO_STATUS_PENDING; + rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[AVAIL]); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(!TAILQ_EMPTY(&shared_resource->nomem_io)); + + /* Unregister the bdev to abort the IOs from nomem_io queue. */ + unregister_bdev(&g_bdev); + CU_ASSERT(status[AVAIL] == SPDK_BDEV_IO_STATUS_FAILED); + SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&shared_resource->nomem_io)); + SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == AVAIL); + + /* Complete the bdev's I/O. */ + stub_complete_io(g_bdev.io_target, AVAIL); + SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == 0); + + spdk_put_io_channel(io_ch); + poll_threads(); + teardown_test(); +} static void enomem_multi_io_target(void) @@ -2070,6 +2125,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, io_during_qos_reset); CU_ADD_TEST(suite, enomem); CU_ADD_TEST(suite, enomem_multi_bdev); + CU_ADD_TEST(suite, enomem_multi_bdev_unregister); CU_ADD_TEST(suite, enomem_multi_io_target); CU_ADD_TEST(suite, qos_dynamic_enable); CU_ADD_TEST(suite, bdev_histograms_mt);