From ab29d2ce5de614bd44cfbcbc1a794a0adcdda93c Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 12 Sep 2017 09:34:55 -0700 Subject: [PATCH] bdev: improve handling of channel deletion with queued resets Upper layers are not supposed to put an I/O channel if there are still I/O outstanding. This should apply to resets as well. To better detect this case, do not remove the reset from the channel's queued_reset list until it is ready to be submitted to the bdev module. This ensures: 1) We can detect if a channel is put with a reset outstanding. 2) We do not access freed memory, when the channel is destroyed before the reset message can submit the reset I/O. 3) Abort the queued reset if a channel is destroyed. Signed-off-by: Jim Harris Change-Id: I0c03eee8b3642155c19c2996e25955baac22d406 Reviewed-on: https://review.gerrithub.io/378198 Tested-by: SPDK Automated Test System Reviewed-by: Changpeng Liu Reviewed-by: Dariusz Stojaczyk Reviewed-by: Daniel Verkamp --- include/spdk_internal/bdev.h | 4 ++ lib/bdev/bdev.c | 28 +++++++--- test/unit/lib/bdev/mt/bdev.c/bdev_ut.c | 75 ++++++++++++++++++++++++-- 3 files changed, 96 insertions(+), 11 deletions(-) diff --git a/include/spdk_internal/bdev.h b/include/spdk_internal/bdev.h index 64ec21284..bbad1caf6 100644 --- a/include/spdk_internal/bdev.h +++ b/include/spdk_internal/bdev.h @@ -288,6 +288,10 @@ struct spdk_bdev_io { /** Starting offset (in blocks) of the bdev for this I/O. */ uint64_t offset_blocks; } bdev; + struct { + /** Channel reference held while messages for this reset are in progress. */ + struct spdk_io_channel *ch_ref; + } reset; struct { /* The NVMe command to execute */ struct spdk_nvme_cmd cmd; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index d516aa5df..d3ec9589a 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -1147,8 +1147,11 @@ spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, static void _spdk_bdev_reset_dev(void *io_device, void *ctx) { - struct spdk_bdev_io *bdev_io = ctx; + struct spdk_bdev_channel *ch = ctx; + struct spdk_bdev_io *bdev_io; + bdev_io = TAILQ_FIRST(&ch->queued_resets); + TAILQ_REMOVE(&ch->queued_resets, bdev_io, link); spdk_bdev_io_submit_reset(bdev_io); } @@ -1169,26 +1172,31 @@ _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch, static void _spdk_bdev_start_reset(void *ctx) { - struct spdk_bdev_io *bdev_io = ctx; + struct spdk_bdev_channel *ch = ctx; - spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_reset_abort_channel, - bdev_io, _spdk_bdev_reset_dev); + spdk_for_each_channel(ch->bdev, _spdk_bdev_reset_abort_channel, + ch, _spdk_bdev_reset_dev); } static void _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch) { struct spdk_bdev *bdev = ch->bdev; - struct spdk_bdev_io *reset_io; assert(!TAILQ_EMPTY(&ch->queued_resets)); pthread_mutex_lock(&bdev->mutex); if (!bdev->reset_in_progress) { bdev->reset_in_progress = true; - reset_io = TAILQ_FIRST(&ch->queued_resets); - TAILQ_REMOVE(&ch->queued_resets, reset_io, link); - _spdk_bdev_start_reset(reset_io); + /* + * Take a channel reference for the target bdev for the life of this + * reset. This guards against the channel getting destroyed while + * spdk_for_each_channel() calls related to this reset IO are in + * progress. We will release the reference when this reset is + * completed. + */ + TAILQ_FIRST(&ch->queued_resets)->u.reset.ch_ref = spdk_get_io_channel(bdev); + _spdk_bdev_start_reset(ch); } pthread_mutex_unlock(&bdev->mutex); } @@ -1219,6 +1227,7 @@ spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, bdev_io->ch = channel; bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; + bdev_io->u.reset.ch_ref = NULL; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); pthread_mutex_lock(&bdev->mutex); @@ -1350,6 +1359,9 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta pthread_mutex_lock(&bdev_io->bdev->mutex); bdev_io->bdev->reset_in_progress = false; pthread_mutex_unlock(&bdev_io->bdev->mutex); + if (bdev_io->u.reset.ch_ref != NULL) { + spdk_put_io_channel(bdev_io->u.reset.ch_ref); + } spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_complete_reset_channel, NULL, NULL); } else { assert(bdev_io->ch->io_outstanding > 0); diff --git a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c index 97e536eb5..6bcd2ca8d 100644 --- a/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/mt/bdev.c/bdev_ut.c @@ -51,12 +51,19 @@ struct ut_bdev { int io_target; }; +struct ut_bdev_channel { + TAILQ_HEAD(, spdk_bdev_io) outstanding_io; +}; + struct ut_bdev g_bdev; struct spdk_bdev_desc *g_desc; static int stub_create_ch(void *io_device, void *ctx_buf) { + struct ut_bdev_channel *ch = ctx_buf; + + TAILQ_INIT(&ch->outstanding_io); return 0; } @@ -77,9 +84,34 @@ stub_destruct(void *ctx) return 0; } +static void +stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) +{ + struct ut_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); + + TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link); +} + +static void +stub_complete_io(void) +{ + struct spdk_io_channel *_ch = spdk_get_io_channel(&g_bdev.io_target); + struct ut_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); + struct spdk_bdev_io *io; + + while (!TAILQ_EMPTY(&ch->outstanding_io)) { + io = TAILQ_FIRST(&ch->outstanding_io); + TAILQ_REMOVE(&ch->outstanding_io, io, module_link); + spdk_bdev_io_complete(io, SPDK_BDEV_IO_STATUS_SUCCESS); + } + + spdk_put_io_channel(_ch); +} + static struct spdk_bdev_fn_table fn_table = { .get_io_channel = stub_get_io_channel, .destruct = stub_destruct, + .submit_request = stub_submit_request, }; static int @@ -102,7 +134,8 @@ register_bdev(void) g_bdev.bdev.fn_table = &fn_table; g_bdev.bdev.module = SPDK_GET_BDEV_MODULE(bdev_ut); - spdk_io_device_register(&g_bdev.io_target, stub_create_ch, stub_destroy_ch, 0); + spdk_io_device_register(&g_bdev.io_target, stub_create_ch, stub_destroy_ch, + sizeof(struct ut_bdev_channel)); spdk_bdev_register(&g_bdev.bdev); } @@ -145,7 +178,7 @@ teardown_test(void) } static void -test1(void) +basic(void) { setup_test(); @@ -157,6 +190,41 @@ test1(void) teardown_test(); } +static void +reset_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + bool *done = cb_arg; + + CU_ASSERT(success == true); + *done = true; + spdk_bdev_free_io(bdev_io); +} + +static void +put_channel_during_reset(void) +{ + struct spdk_io_channel *io_ch; + bool done = false; + + setup_test(); + + set_thread(0); + io_ch = spdk_bdev_get_io_channel(g_desc); + CU_ASSERT(io_ch != NULL); + + /* + * Start a reset, but then put the I/O channel before + * the deferred messages for the reset get a chance to + * execute. + */ + spdk_bdev_reset(g_desc, io_ch, reset_done, &done); + spdk_put_io_channel(io_ch); + poll_threads(); + stub_complete_io(); + + teardown_test(); +} + int main(int argc, char **argv) { @@ -174,7 +242,8 @@ main(int argc, char **argv) } if ( - CU_add_test(suite, "test1", test1) == NULL + CU_add_test(suite, "basic", basic) == NULL || + CU_add_test(suite, "put_channel_during_reset", put_channel_during_reset) == NULL ) { CU_cleanup_registry(); return CU_get_error();