From 7cd20dd3f526292dc01e7256865013474904391f Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Wed, 3 Jun 2020 10:11:28 +0900 Subject: [PATCH] lib/bdev: Add spdk_bdev_abort API Add spdk_bdev_abort function as a new public API. This goes all the way down to the bdev driver module and attempts to abort all I/Os which has bio_cb_arg as its callback argument. We can separate when only a single I/O has bio_cb_arg and when multiple I/Os have bio_cb_arg, but unify both by using parent - children I/O relationship. To avoid confusion, return matched_ios by _bdev_abort() and store it into split_outstanding by the caller. Exclude any I/O submitted after this abort command because the same cb_arg may be used by all I/Os and abort may never complete. bdev_io needs to have both bio_cb_arg and bio_to_abort because bio_cb_arg is used to continue abort processing when it is stopped due to the capacity of bdev_io pool, and bio_to_abort is used to pass it to the underlying bdev module at submission. Parent I/O is not submitted directly, and is only used in the generic bdev layer, and parent I/O's bdev_io uses bio_cb_arg. Hence add bio_cb_arg to bdev structure and add bio_to_abort to abort structure. In the meantime of abort operation, target I/Os may be completed. Hence check if the target I/O still exists at completion, and set the completion status to false only if it still exists. Upon completion of this, i.e., this returned zero, the status SPDK_BDEV_IO_STATUS_SUCCESS indicates all I/Os were successfully aborted, or the status SPDK_BDEV_IO_STATUS_FAILED indicates any I/O was failed to abort by any reason. spdk_bdev_abort() does not support aborting abort or reset request due to the complexity for now. Following patches will support I/O split case. Add unit tests together to cover the basic paths. Besides, ABI compatibility check required us to bump up SO version of a few libraries or modules. Bump up SO version of blob bdev module simply because it does not have any out-of-tree consumer, and suppress bumping up SO version of lvol library because the affected struct spdk_lvol is not part of public APIs. Signed-off-by: Shuhei Matsumoto Change-Id: I515da688503557615d491bf0bfb36322ce37df08 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/2014 Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot Community-CI: Broadcom CI Reviewed-by: Jim Harris Reviewed-by: Aleksey Marchuk Reviewed-by: Ben Walker --- include/spdk/bdev.h | 32 +++++ include/spdk/bdev_module.h | 11 ++ lib/bdev/bdev.c | 211 ++++++++++++++++++++++++++++ lib/bdev/spdk_bdev.map | 1 + module/blob/bdev/Makefile | 2 +- test/make/check_so_deps.sh | 2 + test/unit/lib/bdev/bdev.c/bdev_ut.c | 118 ++++++++++++++++ 7 files changed, 376 insertions(+), 1 deletion(-) diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 6003fa448..00a4a9883 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -139,6 +139,7 @@ enum spdk_bdev_io_type { SPDK_BDEV_IO_TYPE_ZONE_APPEND, SPDK_BDEV_IO_TYPE_COMPARE, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE, + SPDK_BDEV_IO_TYPE_ABORT, SPDK_BDEV_NUM_IO_TYPES /* Keep last */ }; @@ -1385,6 +1386,37 @@ int spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * int spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit abort requests to abort all I/Os which has bio_cb_arg as its callback + * context to the bdev on the given channel. + * + * This goes all the way down to the bdev driver module and attempts to abort all + * I/Os which have bio_cb_arg as their callback context if they exist. This is a best + * effort command. Upon completion of this, the status SPDK_BDEV_IO_STATUS_SUCCESS + * indicates all the I/Os were successfully aborted, or the status + * SPDK_BDEV_IO_STATUS_FAILED indicates any I/O was failed to abort for any reason + * or no I/O which has bio_cb_arg as its callback context was found. + * + * \ingroup bdev_io_submit functions + * + * \param desc Block device descriptor. + * \param ch The I/O channel which the I/Os to be aborted are associated with. + * \param bio_cb_arg Callback argument for the outstanding requests which this + * function attempts to abort. + * \param cb Called when the abort request is completed. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always be called (even if the + * request ultimately failed). Return negated errno on failure, in which case the + * callback will not be called. + * * -EINVAL - bio_cb_arg was not specified. + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated. + * * -ENOTSUP - the bdev does not support abort. + */ +int spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *bio_cb_arg, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Submit an NVMe Admin command to the bdev. This passes directly through * the block layer to the device. Support for NVMe passthru is optional, diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 9b74ad9d3..2ca298aef 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -533,11 +533,22 @@ struct spdk_bdev_io { /** True if this request is in the 'start' phase of zcopy. False if in 'end'. */ uint8_t start : 1; } zcopy; + + struct { + /** The callback argument for the outstanding request which this abort + * attempts to cancel. + */ + void *bio_cb_arg; + } abort; } bdev; struct { /** Channel reference held while messages for this reset are in progress. */ struct spdk_io_channel *ch_ref; } reset; + struct { + /** The outstanding request matching bio_cb_arg which this abort attempts to cancel. */ + struct spdk_bdev_io *bio_to_abort; + } abort; struct { /* The NVMe command to execute */ struct spdk_nvme_cmd cmd; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 022fe4222..e68236811 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -370,6 +370,8 @@ bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, uint64_t offset, uint64_t length, lock_range_cb cb_fn, void *cb_arg); +static inline void bdev_io_complete(void *ctx); + void spdk_bdev_get_opts(struct spdk_bdev_opts *opts) { @@ -4410,6 +4412,215 @@ spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channe return 0; } +static void bdev_abort_retry(void *ctx); + +static void +bdev_abort_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_channel *channel = bdev_io->internal.ch; + struct spdk_bdev_io *parent_io = cb_arg; + struct spdk_bdev_io *bio_to_abort, *tmp_io; + + bio_to_abort = bdev_io->u.abort.bio_to_abort; + + spdk_bdev_free_io(bdev_io); + + if (!success) { + /* Check if the target I/O completed in the meantime. */ + TAILQ_FOREACH(tmp_io, &channel->io_submitted, internal.ch_link) { + if (tmp_io == bio_to_abort) { + break; + } + } + + /* If the target I/O still exists, set the parent to failed. */ + if (tmp_io != NULL) { + parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED; + } + } + + parent_io->u.bdev.split_outstanding--; + if (parent_io->u.bdev.split_outstanding == 0) { + if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) { + bdev_abort_retry(parent_io); + } else { + bdev_io_complete(parent_io); + } + } +} + +static int +bdev_abort_io(struct spdk_bdev_desc *desc, struct spdk_bdev_channel *channel, + struct spdk_bdev_io *bio_to_abort, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_io *bdev_io; + + if (bio_to_abort->type == SPDK_BDEV_IO_TYPE_ABORT || + bio_to_abort->type == SPDK_BDEV_IO_TYPE_RESET) { + /* TODO: Abort reset or abort request. */ + return -ENOTSUP; + } + + if (bdev->split_on_optimal_io_boundary && bdev_io_should_split(bio_to_abort)) { + return -ENOTSUP; + } + + bdev_io = bdev_channel_get_io(channel); + if (bdev_io == NULL) { + return -ENOMEM; + } + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; + bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT; + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io->u.abort.bio_to_abort = bio_to_abort; + + /* Submit the abort request to the underlying bdev module. */ + bdev_io_submit(bdev_io); + + return 0; +} + +static uint32_t +_bdev_abort(struct spdk_bdev_io *parent_io) +{ + struct spdk_bdev_desc *desc = parent_io->internal.desc; + struct spdk_bdev_channel *channel = parent_io->internal.ch; + void *bio_cb_arg; + struct spdk_bdev_io *bio_to_abort; + uint32_t matched_ios; + int rc; + + bio_cb_arg = parent_io->u.bdev.abort.bio_cb_arg; + + /* matched_ios is returned and will be kept by the caller. + * + * This funcion will be used for two cases, 1) the same cb_arg is used for + * multiple I/Os, 2) a single large I/O is split into smaller ones. + * Incrementing split_outstanding directly here may confuse readers especially + * for the 1st case. + * + * Completion of I/O abort is processed after stack unwinding. Hence this trick + * works as expected. + */ + matched_ios = 0; + parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS; + + TAILQ_FOREACH(bio_to_abort, &channel->io_submitted, internal.ch_link) { + if (bio_to_abort->internal.caller_ctx != bio_cb_arg) { + continue; + } + + if (bio_to_abort->internal.submit_tsc > parent_io->internal.submit_tsc) { + /* Any I/O which was submitted after this abort command should be excluded. */ + continue; + } + + rc = bdev_abort_io(desc, channel, bio_to_abort, bdev_abort_io_done, parent_io); + if (rc != 0) { + if (rc == -ENOMEM) { + parent_io->internal.status = SPDK_BDEV_IO_STATUS_NOMEM; + } else { + parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED; + } + break; + } + matched_ios++; + } + + return matched_ios; +} + +static void +bdev_abort_retry(void *ctx) +{ + struct spdk_bdev_io *parent_io = ctx; + uint32_t matched_ios; + + matched_ios = _bdev_abort(parent_io); + + if (matched_ios == 0) { + if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) { + bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry); + } else { + /* For retry, the case that no target I/O was found is success + * because it means target I/Os completed in the meantime. + */ + bdev_io_complete(parent_io); + } + return; + } + + /* Use split_outstanding to manage the progress of aborting I/Os. */ + parent_io->u.bdev.split_outstanding = matched_ios; +} + +static void +bdev_abort(struct spdk_bdev_io *parent_io) +{ + uint32_t matched_ios; + + matched_ios = _bdev_abort(parent_io); + + if (matched_ios == 0) { + if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) { + bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry); + } else { + /* The case the no target I/O was found is failure. */ + parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED; + bdev_io_complete(parent_io); + } + return; + } + + /* Use split_outstanding to manage the progress of aborting I/Os. */ + parent_io->u.bdev.split_outstanding = matched_ios; +} + +int +spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *bio_cb_arg, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + struct spdk_bdev_io *bdev_io; + + if (bio_cb_arg == NULL) { + return -EINVAL; + } + + if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ABORT)) { + return -ENOTSUP; + } + + bdev_io = bdev_channel_get_io(channel); + if (bdev_io == NULL) { + return -ENOMEM; + } + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; + bdev_io->internal.submit_tsc = spdk_get_ticks(); + bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT; + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io->u.bdev.abort.bio_cb_arg = bio_cb_arg; + + /* Parent abort request is not submitted directly, but to manage its execution, + * add it to the submitted list here. + */ + TAILQ_INSERT_TAIL(&channel->io_submitted, bdev_io, internal.ch_link); + + bdev_abort(bdev_io); + + return 0; +} + int spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch, struct spdk_bdev_io_wait_entry *entry) diff --git a/lib/bdev/spdk_bdev.map b/lib/bdev/spdk_bdev.map index 7cbbccc26..17231be62 100644 --- a/lib/bdev/spdk_bdev.map +++ b/lib/bdev/spdk_bdev.map @@ -73,6 +73,7 @@ spdk_bdev_flush; spdk_bdev_flush_blocks; spdk_bdev_reset; + spdk_bdev_abort; spdk_bdev_nvme_admin_passthru; spdk_bdev_nvme_io_passthru; spdk_bdev_nvme_io_passthru_md; diff --git a/module/blob/bdev/Makefile b/module/blob/bdev/Makefile index c0ca6c6f6..d40f6afb8 100644 --- a/module/blob/bdev/Makefile +++ b/module/blob/bdev/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 2 +SO_VER := 3 SO_MINOR := 0 C_SRCS = blob_bdev.c diff --git a/test/make/check_so_deps.sh b/test/make/check_so_deps.sh index 1f328525c..04255d241 100755 --- a/test/make/check_so_deps.sh +++ b/test/make/check_so_deps.sh @@ -276,6 +276,8 @@ function confirm_abi_deps() { name = spdk_vhost_scsi_device_backend [suppress_type] name = spdk_net_impl +[suppress_type] + name = spdk_lvol EOF for object in "$libdir"/libspdk_*.so; do diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index aec7789b8..8f850157f 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -120,6 +120,8 @@ static void *g_compare_read_buf; static uint32_t g_compare_read_buf_len; static void *g_compare_write_buf; static uint32_t g_compare_write_buf_len; +static bool g_abort_done; +static enum spdk_bdev_io_status g_abort_status; static struct ut_expected_io * ut_alloc_expected_io(uint8_t type, uint64_t offset, uint64_t length, int iovcnt) @@ -150,6 +152,7 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) struct bdev_ut_channel *ch = spdk_io_channel_get_ctx(_ch); struct ut_expected_io *expected_io; struct iovec *iov, *expected_iov; + struct spdk_bdev_io *bio_to_abort; int i; g_bdev_io = bdev_io; @@ -180,6 +183,19 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) } } + if (bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) { + if (g_io_exp_status == SPDK_BDEV_IO_STATUS_SUCCESS) { + TAILQ_FOREACH(bio_to_abort, &ch->outstanding_io, module_link) { + if (bio_to_abort == bdev_io->u.abort.bio_to_abort) { + TAILQ_REMOVE(&ch->outstanding_io, bio_to_abort, module_link); + ch->outstanding_io_count--; + spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_FAILED); + break; + } + } + } + } + TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link); ch->outstanding_io_count++; @@ -280,6 +296,7 @@ static bool g_io_types_supported[SPDK_BDEV_NUM_IO_TYPES] = { [SPDK_BDEV_IO_TYPE_NVME_IO_MD] = true, [SPDK_BDEV_IO_TYPE_WRITE_ZEROES] = true, [SPDK_BDEV_IO_TYPE_ZCOPY] = true, + [SPDK_BDEV_IO_TYPE_ABORT] = true, }; static void @@ -3117,6 +3134,106 @@ lock_lba_range_overlapped(void) poll_threads(); } +static void +abort_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + g_abort_done = true; + g_abort_status = bdev_io->internal.status; + spdk_bdev_free_io(bdev_io); +} + +static void +bdev_io_abort(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc = NULL; + struct spdk_io_channel *io_ch; + struct spdk_bdev_opts bdev_opts = { + .bdev_io_pool_size = 4, + .bdev_io_cache_size = 2, + }; + uint64_t io_ctx1 = 0, io_ctx2 = 0; + int rc; + + rc = spdk_bdev_set_opts(&bdev_opts); + CU_ASSERT(rc == 0); + spdk_bdev_initialize(bdev_init_cb, NULL); + + bdev = allocate_bdev("bdev0"); + + rc = spdk_bdev_open(bdev, true, NULL, NULL, &desc); + CU_ASSERT(rc == 0); + CU_ASSERT(desc != NULL); + io_ch = spdk_bdev_get_io_channel(desc); + CU_ASSERT(io_ch != NULL); + + g_abort_done = false; + + ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, false); + + rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL); + CU_ASSERT(rc == -ENOTSUP); + + ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, true); + + rc = spdk_bdev_abort(desc, io_ch, &io_ctx2, abort_done, NULL); + CU_ASSERT(rc == 0); + CU_ASSERT(g_abort_done == true); + CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_FAILED); + + /* Test the case that the target I/O was successfully aborted. */ + g_io_done = false; + + rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + + g_abort_done = false; + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED); + stub_complete_io(1); + CU_ASSERT(g_abort_done == true); + CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS); + + /* Test the case that the target I/O was not aborted because it completed + * in the middle of execution of the abort. + */ + g_io_done = false; + + rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + + g_abort_done = false; + g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED; + + rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS); + + g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED; + stub_complete_io(1); + CU_ASSERT(g_abort_done == true); + CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS); + + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + spdk_put_io_channel(io_ch); + spdk_bdev_close(desc); + free_bdev(bdev); + spdk_bdev_finish(bdev_fini_cb, NULL); + poll_threads(); +} + int main(int argc, char **argv) { @@ -3153,6 +3270,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, lock_lba_range_check_ranges); CU_ADD_TEST(suite, lock_lba_range_with_io_outstanding); CU_ADD_TEST(suite, lock_lba_range_overlapped); + CU_ADD_TEST(suite, bdev_io_abort); allocate_threads(1); set_thread(0);