From d14afd5000f77b8bcf093951b99bc47979f244e7 Mon Sep 17 00:00:00 2001 From: Evgeniy Kochetov Date: Mon, 22 Aug 2022 12:14:48 +0300 Subject: [PATCH] bdev: Add copy IO type Copy operation is defined by source and destination LBAs and LBA count to copy. For destiantion LBA and LBA count we reuse exiting fields `offset_blocks` and `num_blocks` in `struct spdk_bdev_io`. For source LBA new field `src_offset_blocks` was added. `spdk_bdev_get_max_copy()` function can be used to retrieve maximum possible unsplit copy size. Zero values means unlimited. It is allowed to submit larger copy size but it will be split into several bdev IOs. Signed-off-by: Evgeniy Kochetov Change-Id: I2ad56294b6c062595c026ffcf9b435f0100d3d7e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14344 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Aleksey Marchuk Reviewed-by: Shuhei Matsumoto Community-CI: Mellanox Build Bot --- CHANGELOG.md | 2 + include/spdk/bdev.h | 34 ++++++++++ include/spdk/bdev_module.h | 8 +++ lib/bdev/Makefile | 4 +- lib/bdev/bdev.c | 124 ++++++++++++++++++++++++++++++++++++- lib/bdev/spdk_bdev.map | 2 + lib/ftl/Makefile | 2 +- 7 files changed, 172 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e06c5b84..d0a9d58b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ associated function pointers were added to iterate each channel of the required The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for required bdev. +New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy commands. + ## v22.09 ### accel diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index f8087626c..ec247255c 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -119,6 +119,7 @@ enum spdk_bdev_io_type { SPDK_BDEV_IO_TYPE_ABORT, SPDK_BDEV_IO_TYPE_SEEK_HOLE, SPDK_BDEV_IO_TYPE_SEEK_DATA, + SPDK_BDEV_IO_TYPE_COPY, SPDK_BDEV_NUM_IO_TYPES /* Keep last */ }; @@ -668,6 +669,14 @@ bool spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev); bool spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev, enum spdk_dif_check_type check_type); +/** + * Get block device max copy size. + * + * \param bdev Block device to query. + * \return Max copy size for this bdev in blocks. 0 means unlimited. + */ +uint32_t spdk_bdev_get_max_copy(const struct spdk_bdev *bdev); + /** * Get the most recently measured queue depth from a bdev. * @@ -1709,6 +1718,31 @@ int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc, void *buf, size_t nbytes, void *md_buf, size_t md_len, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a copy request to the block device. + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param dst_offset_blocks The destination offset, in blocks, from the start of the block device. + * \param src_offset_blocks The source offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to copy. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - dst_offset_blocks, src_offset_blocks and/or num_blocks are out of range + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + * * -ENOTSUP - copy operation is not supported + */ +int spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t dst_offset_blocks, uint64_t src_offset_blocks, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Free an I/O request. This should only be called after the completion callback * for the I/O has been called and notifies the bdev layer that memory may now diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 1cacc7822..8e13e0b39 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -374,6 +374,9 @@ struct spdk_bdev { /* Maximum write zeroes in unit of logical block */ uint32_t max_write_zeroes; + /* Maximum copy size in unit of logical block */ + uint32_t max_copy; + /** * UUID for this bdev. * @@ -663,6 +666,11 @@ struct spdk_bdev_io { /** The offset of next data/hole. */ uint64_t offset; } seek; + + struct { + /** Starting source offset (in blocks) of the bdev for copy I/O. */ + uint64_t src_offset_blocks; + } copy; } bdev; struct { /** Channel reference held while messages for this reset are in progress. */ diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile index c7c12e3b2..9e7779964 100644 --- a/lib/bdev/Makefile +++ b/lib/bdev/Makefile @@ -6,8 +6,8 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 10 -SO_MINOR := 1 +SO_VER := 11 +SO_MINOR := 0 ifeq ($(CONFIG_VTUNE),y) CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index fc8a8d542..510f44459 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -56,6 +56,11 @@ int __itt_init_ittlib(const char *, __itt_group_id); #define SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS (8) #define BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD 1000000 +/* The maximum number of children requests for a COPY command + * when splitting into children requests at a time. + */ +#define SPDK_BDEV_MAX_CHILDREN_COPY_REQS (8) + static const char *qos_rpc_type[] = {"rw_ios_per_sec", "rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec" }; @@ -2345,6 +2350,17 @@ bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io) return false; } +static bool +bdev_copy_should_split(struct spdk_bdev_io *bdev_io) +{ + if (bdev_io->bdev->max_copy != 0 && + bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_copy) { + return true; + } + + return false; +} + static bool bdev_io_should_split(struct spdk_bdev_io *bdev_io) { @@ -2356,6 +2372,8 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io) return bdev_unmap_should_split(bdev_io); case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: return bdev_write_zeroes_should_split(bdev_io); + case SPDK_BDEV_IO_TYPE_COPY: + return bdev_copy_should_split(bdev_io); default: return false; } @@ -2387,12 +2405,20 @@ _bdev_write_zeroes_split(void *_bdev_io) return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io); } +static void bdev_copy_split(struct spdk_bdev_io *bdev_io); + +static void +_bdev_copy_split(void *_bdev_io) +{ + return bdev_copy_split((struct spdk_bdev_io *)_bdev_io); +} + static int bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf, uint64_t num_blocks, uint64_t *offset, uint64_t *remaining) { int rc; - uint64_t current_offset, current_remaining; + uint64_t current_offset, current_remaining, current_src_offset; spdk_bdev_io_wait_cb io_wait_fn; current_offset = *offset; @@ -2432,6 +2458,15 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt current_offset, num_blocks, bdev_io_split_done, bdev_io); break; + case SPDK_BDEV_IO_TYPE_COPY: + io_wait_fn = _bdev_copy_split; + current_src_offset = bdev_io->u.bdev.copy.src_offset_blocks + + (current_offset - bdev_io->u.bdev.offset_blocks); + rc = spdk_bdev_copy_blocks(bdev_io->internal.desc, + spdk_io_channel_from_ctx(bdev_io->internal.ch), + current_offset, current_src_offset, num_blocks, + bdev_io_split_done, bdev_io); + break; default: assert(false); rc = -EINVAL; @@ -2655,6 +2690,30 @@ bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io) } } +static void +bdev_copy_split(struct spdk_bdev_io *bdev_io) +{ + uint64_t offset, copy_blocks, remaining; + uint32_t num_children_reqs = 0; + int rc; + + offset = bdev_io->u.bdev.split_current_offset_blocks; + remaining = bdev_io->u.bdev.split_remaining_num_blocks; + + assert(bdev_io->bdev->max_copy != 0); + while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) { + copy_blocks = spdk_min(remaining, bdev_io->bdev->max_copy); + + rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, copy_blocks, + &offset, &remaining); + if (spdk_likely(rc == 0)) { + num_children_reqs++; + } else { + return; + } + } +} + static void parent_bdev_io_complete(void *ctx, int rc) { @@ -2718,6 +2777,9 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: bdev_write_zeroes_split(parent_io); break; + case SPDK_BDEV_IO_TYPE_COPY: + bdev_copy_split(parent_io); + break; default: assert(false); break; @@ -2752,6 +2814,9 @@ bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: bdev_write_zeroes_split(bdev_io); break; + case SPDK_BDEV_IO_TYPE_COPY: + bdev_copy_split(bdev_io); + break; default: assert(false); break; @@ -2845,6 +2910,7 @@ bdev_io_range_is_locked(struct spdk_bdev_io *bdev_io, struct lba_range *range) case SPDK_BDEV_IO_TYPE_UNMAP: case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: case SPDK_BDEV_IO_TYPE_ZCOPY: + case SPDK_BDEV_IO_TYPE_COPY: r.offset = bdev_io->u.bdev.offset_blocks; r.length = bdev_io->u.bdev.num_blocks; if (!bdev_lba_range_overlapped(range, &r)) { @@ -3960,6 +4026,12 @@ spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev, } } +uint32_t +spdk_bdev_get_max_copy(const struct spdk_bdev *bdev) +{ + return bdev->max_copy; +} + uint64_t spdk_bdev_get_qd(const struct spdk_bdev *bdev) { @@ -8100,6 +8172,56 @@ spdk_bdev_for_each_channel(struct spdk_bdev *bdev, spdk_bdev_for_each_channel_ms iter, bdev_each_channel_cpl); } +int +spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t dst_offset_blocks, uint64_t src_offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + + if (!desc->write) { + return -EBADF; + } + + if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY))) { + SPDK_DEBUGLOG(bdev, "Copy IO type is not supported\n"); + return -ENOTSUP; + } + + if (num_blocks == 0) { + SPDK_ERRLOG("Can't copy 0 blocks\n"); + return -EINVAL; + } + + if (!bdev_io_valid_blocks(bdev, dst_offset_blocks, num_blocks) || + !bdev_io_valid_blocks(bdev, src_offset_blocks, num_blocks)) { + SPDK_DEBUGLOG(bdev, + "Invalid offset or number of blocks: dst %lu, src %lu, count %lu\n", + dst_offset_blocks, src_offset_blocks, num_blocks); + return -EINVAL; + } + + bdev_io = bdev_channel_get_io(channel); + if (!bdev_io) { + return -ENOMEM; + } + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; + bdev_io->type = SPDK_BDEV_IO_TYPE_COPY; + + bdev_io->u.bdev.offset_blocks = dst_offset_blocks; + bdev_io->u.bdev.copy.src_offset_blocks = src_offset_blocks; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.ext_opts = NULL; + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); + return 0; +} + SPDK_LOG_REGISTER_COMPONENT(bdev) SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV) diff --git a/lib/bdev/spdk_bdev.map b/lib/bdev/spdk_bdev.map index 8078528b4..8e708e59f 100644 --- a/lib/bdev/spdk_bdev.map +++ b/lib/bdev/spdk_bdev.map @@ -105,6 +105,8 @@ spdk_bdev_writev_blocks_ext; spdk_bdev_for_each_channel; spdk_bdev_for_each_channel_continue; + spdk_bdev_get_max_copy; + spdk_bdev_copy_blocks; # Public functions in bdev_module.h spdk_bdev_register; diff --git a/lib/ftl/Makefile b/lib/ftl/Makefile index e1873f9d4..3ad31c435 100644 --- a/lib/ftl/Makefile +++ b/lib/ftl/Makefile @@ -6,7 +6,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 5 +SO_VER := 6 SO_MINOR := 0 ifdef SPDK_FTL_VSS_EMU