bdev: abort all IOs when unregistering the bdev

To fix issue: #2484

When unregistering the bdev, will send out the message
to each thread to abort all the IOs including IOs from
nomem_io queue, need_buf_small queue and need_buf_large queue.

The new SPDK_BDEV_STATUS_UNREGISTERING state is newly
added to indicate this unregister operation.

In this case, the bdev unregister operation becomes the
async operation as each thread will be sent the message
to abort the IOs and as the last step, it will unregister
the required bdev and associted io device.

On the other hand, the queued_resets will be handled
separately and not aborted in the bdev unregister.

New unit test cases are also added:
  enomem_multi_bdev_unregister: to abort the IO from
nomem_io queue during the unregister operation
  bdev_open_ext_unregister: to handle the events and
async operations from the unregister operation

Change-Id: Ib1663c0f71ffe87144869cb3a684e18eb956046b
Signed-off-by: GangCao <gang.cao@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12573
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Dong Yi <dongx.yi@intel.com>
This commit is contained in:
GangCao 2022-05-06 10:21:29 -04:00 committed by Tomasz Zawadzki
parent 88a064de88
commit 7bcd316de1
4 changed files with 211 additions and 18 deletions

View File

@ -114,6 +114,7 @@ struct spdk_uuid;
enum spdk_bdev_status {
SPDK_BDEV_STATUS_INVALID,
SPDK_BDEV_STATUS_READY,
SPDK_BDEV_STATUS_UNREGISTERING,
SPDK_BDEV_STATUS_REMOVING,
};

View File

@ -3601,12 +3601,21 @@ bdev_io_stat_add(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
total->unmap_latency_ticks += add->unmap_latency_ticks;
}
static void
bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch)
{
struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch;
bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch);
bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch);
}
static void
bdev_channel_destroy(void *io_device, void *ctx_buf)
{
struct spdk_bdev_channel *ch = ctx_buf;
struct spdk_bdev_mgmt_channel *mgmt_ch;
struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
struct spdk_bdev_channel *ch = ctx_buf;
SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
spdk_get_thread());
@ -3619,12 +3628,9 @@ bdev_channel_destroy(void *io_device, void *ctx_buf)
bdev_io_stat_add(&ch->bdev->internal.stat, &ch->stat);
pthread_mutex_unlock(&ch->bdev->internal.mutex);
mgmt_ch = shared_resource->mgmt_ch;
bdev_abort_all_queued_io(&ch->queued_resets, ch);
bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
bdev_abort_all_buf_io(&mgmt_ch->need_buf_small, ch);
bdev_abort_all_buf_io(&mgmt_ch->need_buf_large, ch);
bdev_channel_abort_queued_ios(ch);
if (ch->histogram) {
spdk_histogram_data_free(ch->histogram);
@ -6259,11 +6265,43 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev)
return rc;
}
static void
bdev_unregister_abort_channel(struct spdk_io_channel_iter *i)
{
struct spdk_io_channel *io_ch = spdk_io_channel_iter_get_channel(i);
struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(io_ch);
bdev_channel_abort_queued_ios(bdev_ch);
spdk_for_each_channel_continue(i, 0);
}
static void
bdev_unregister(struct spdk_io_channel_iter *i, int status)
{
struct spdk_bdev *bdev = spdk_io_channel_iter_get_ctx(i);
int rc;
pthread_mutex_lock(&g_bdev_mgr.mutex);
pthread_mutex_lock(&bdev->internal.mutex);
/*
* Set the status to REMOVING after completing to abort channels. Otherwise,
* the last spdk_bdev_close() may call spdk_io_device_unregister() while
* spdk_for_each_channel() is executed and spdk_io_device_unregister() may fail.
*/
bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
rc = bdev_unregister_unsafe(bdev);
pthread_mutex_unlock(&bdev->internal.mutex);
pthread_mutex_unlock(&g_bdev_mgr.mutex);
if (rc == 0) {
spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
}
}
void
spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
{
struct spdk_thread *thread;
int rc;
SPDK_DEBUGLOG(bdev, "Removing bdev %s from list\n", bdev->name);
@ -6277,7 +6315,8 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
}
pthread_mutex_lock(&g_bdev_mgr.mutex);
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
pthread_mutex_unlock(&g_bdev_mgr.mutex);
if (cb_fn) {
cb_fn(cb_arg, -EBUSY);
@ -6286,18 +6325,16 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
}
pthread_mutex_lock(&bdev->internal.mutex);
bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
bdev->internal.status = SPDK_BDEV_STATUS_UNREGISTERING;
bdev->internal.unregister_cb = cb_fn;
bdev->internal.unregister_ctx = cb_arg;
/* Call under lock. */
rc = bdev_unregister_unsafe(bdev);
pthread_mutex_unlock(&bdev->internal.mutex);
pthread_mutex_unlock(&g_bdev_mgr.mutex);
if (rc == 0) {
spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
}
spdk_for_each_channel(__bdev_to_io_dev(bdev),
bdev_unregister_abort_channel,
bdev,
bdev_unregister);
}
static void
@ -6377,7 +6414,8 @@ bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc)
desc->write = write;
pthread_mutex_lock(&bdev->internal.mutex);
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
pthread_mutex_unlock(&bdev->internal.mutex);
return -ENODEV;
}

View File

@ -81,6 +81,8 @@ int g_status;
int g_count;
enum spdk_bdev_event_type g_event_type1;
enum spdk_bdev_event_type g_event_type2;
enum spdk_bdev_event_type g_event_type3;
enum spdk_bdev_event_type g_event_type4;
struct spdk_histogram_data *g_histogram;
void *g_unregister_arg;
int g_unregister_rc;
@ -568,6 +570,18 @@ bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *even
}
}
static void
bdev_open_cb3(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
g_event_type3 = type;
}
static void
bdev_open_cb4(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
g_event_type4 = type;
}
static void
get_device_stat_test(void)
{
@ -3770,6 +3784,8 @@ bdev_open_while_hotremove(void)
CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc[0]));
spdk_bdev_unregister(bdev, NULL, NULL);
/* Bdev unregister is handled asynchronously. Poll thread to complete. */
poll_threads();
rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[1]);
CU_ASSERT(rc == -ENODEV);
@ -3850,6 +3866,87 @@ bdev_open_ext(void)
poll_threads();
}
static void
bdev_open_ext_unregister(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc1 = NULL;
struct spdk_bdev_desc *desc2 = NULL;
struct spdk_bdev_desc *desc3 = NULL;
struct spdk_bdev_desc *desc4 = NULL;
int rc = 0;
bdev = allocate_bdev("bdev");
rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
CU_ASSERT_EQUAL(rc, -EINVAL);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
CU_ASSERT_EQUAL(rc, 0);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
CU_ASSERT_EQUAL(rc, 0);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb3, &desc3, &desc3);
CU_ASSERT_EQUAL(rc, 0);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb4, &desc4, &desc4);
CU_ASSERT_EQUAL(rc, 0);
g_event_type1 = 0xFF;
g_event_type2 = 0xFF;
g_event_type3 = 0xFF;
g_event_type4 = 0xFF;
g_unregister_arg = NULL;
g_unregister_rc = -1;
/* Simulate hot-unplug by unregistering bdev */
spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678);
/*
* Unregister is handled asynchronously and event callback
* (i.e., above bdev_open_cbN) will be called.
* For bdev_open_cb3 and bdev_open_cb4, it is intended to not
* close the desc3 and desc4 so that the bdev is not closed.
*/
poll_threads();
/* Check if correct events have been triggered in event callback fn */
CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
CU_ASSERT_EQUAL(g_event_type3, SPDK_BDEV_EVENT_REMOVE);
CU_ASSERT_EQUAL(g_event_type4, SPDK_BDEV_EVENT_REMOVE);
/* Check that unregister callback is delayed */
CU_ASSERT(g_unregister_arg == NULL);
CU_ASSERT(g_unregister_rc == -1);
/*
* Explicitly close desc3. As desc4 is still opened there, the
* unergister callback is still delayed to execute.
*/
spdk_bdev_close(desc3);
CU_ASSERT(g_unregister_arg == NULL);
CU_ASSERT(g_unregister_rc == -1);
/*
* Explicitly close desc4 to trigger the ongoing bdev unregister
* operation after last desc is closed.
*/
spdk_bdev_close(desc4);
/* Poll the thread for the async unregister operation */
poll_threads();
/* Check that unregister callback is executed */
CU_ASSERT(g_unregister_arg == (void *)0x12345678);
CU_ASSERT(g_unregister_rc == 0);
free_bdev(bdev);
poll_threads();
}
struct timeout_io_cb_arg {
struct iovec iov;
uint8_t type;
@ -5350,6 +5447,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, bdev_open_while_hotremove);
CU_ADD_TEST(suite, bdev_close_while_hotremove);
CU_ADD_TEST(suite, bdev_open_ext);
CU_ADD_TEST(suite, bdev_open_ext_unregister);
CU_ADD_TEST(suite, bdev_set_io_timeout);
CU_ADD_TEST(suite, lba_range_overlap);
CU_ADD_TEST(suite, lock_lba_range_check_ranges);

View File

@ -280,6 +280,8 @@ unregister_bdev(struct ut_bdev *ut_bdev)
/* Handle any deferred messages. */
poll_threads();
spdk_bdev_unregister(&ut_bdev->bdev, NULL, NULL);
/* Handle the async bdev unregister. */
poll_threads();
}
static void
@ -1218,6 +1220,59 @@ enomem_multi_bdev(void)
teardown_test();
}
static void
enomem_multi_bdev_unregister(void)
{
struct spdk_io_channel *io_ch;
struct spdk_bdev_channel *bdev_ch;
struct spdk_bdev_shared_resource *shared_resource;
struct ut_bdev_channel *ut_ch;
const uint32_t IO_ARRAY_SIZE = 64;
const uint32_t AVAIL = 20;
enum spdk_bdev_io_status status[IO_ARRAY_SIZE];
uint32_t i;
int rc;
setup_test();
set_thread(0);
io_ch = spdk_bdev_get_io_channel(g_desc);
bdev_ch = spdk_io_channel_get_ctx(io_ch);
shared_resource = bdev_ch->shared_resource;
ut_ch = spdk_io_channel_get_ctx(bdev_ch->channel);
ut_ch->avail_cnt = AVAIL;
/* Saturate io_target through the bdev. */
for (i = 0; i < AVAIL; i++) {
status[i] = SPDK_BDEV_IO_STATUS_PENDING;
rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[i]);
CU_ASSERT(rc == 0);
}
CU_ASSERT(TAILQ_EMPTY(&shared_resource->nomem_io));
/*
* Now submit I/O through the bdev. This should fail with ENOMEM
* and then go onto the nomem_io list.
*/
status[AVAIL] = SPDK_BDEV_IO_STATUS_PENDING;
rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[AVAIL]);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(!TAILQ_EMPTY(&shared_resource->nomem_io));
/* Unregister the bdev to abort the IOs from nomem_io queue. */
unregister_bdev(&g_bdev);
CU_ASSERT(status[AVAIL] == SPDK_BDEV_IO_STATUS_FAILED);
SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&shared_resource->nomem_io));
SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == AVAIL);
/* Complete the bdev's I/O. */
stub_complete_io(g_bdev.io_target, AVAIL);
SPDK_CU_ASSERT_FATAL(shared_resource->io_outstanding == 0);
spdk_put_io_channel(io_ch);
poll_threads();
teardown_test();
}
static void
enomem_multi_io_target(void)
@ -2070,6 +2125,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, io_during_qos_reset);
CU_ADD_TEST(suite, enomem);
CU_ADD_TEST(suite, enomem_multi_bdev);
CU_ADD_TEST(suite, enomem_multi_bdev_unregister);
CU_ADD_TEST(suite, enomem_multi_io_target);
CU_ADD_TEST(suite, qos_dynamic_enable);
CU_ADD_TEST(suite, bdev_histograms_mt);