bdev: Add copy IO type

Copy operation is defined by source and destination LBAs and LBA count
to copy. For destiantion LBA and LBA count we reuse exiting fields
`offset_blocks` and `num_blocks` in `struct spdk_bdev_io`. For source
LBA new field `src_offset_blocks` was added.

`spdk_bdev_get_max_copy()` function can be used to retrieve maximum
possible unsplit copy size. Zero values means unlimited. It is allowed
to submit larger copy size but it will be split into several bdev IOs.

Signed-off-by: Evgeniy Kochetov <evgeniik@nvidia.com>
Change-Id: I2ad56294b6c062595c026ffcf9b435f0100d3d7e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14344
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Evgeniy Kochetov 2022-08-22 12:14:48 +03:00 committed by Tomasz Zawadzki
parent e28e247954
commit d14afd5000
7 changed files with 172 additions and 4 deletions

View File

@ -21,6 +21,8 @@ associated function pointers were added to iterate each channel of the required
The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for
required bdev.
New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy commands.
## v22.09
### accel

View File

@ -119,6 +119,7 @@ enum spdk_bdev_io_type {
SPDK_BDEV_IO_TYPE_ABORT,
SPDK_BDEV_IO_TYPE_SEEK_HOLE,
SPDK_BDEV_IO_TYPE_SEEK_DATA,
SPDK_BDEV_IO_TYPE_COPY,
SPDK_BDEV_NUM_IO_TYPES /* Keep last */
};
@ -668,6 +669,14 @@ bool spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev);
bool spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
enum spdk_dif_check_type check_type);
/**
* Get block device max copy size.
*
* \param bdev Block device to query.
* \return Max copy size for this bdev in blocks. 0 means unlimited.
*/
uint32_t spdk_bdev_get_max_copy(const struct spdk_bdev *bdev);
/**
* Get the most recently measured queue depth from a bdev.
*
@ -1709,6 +1718,31 @@ int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc,
void *buf, size_t nbytes, void *md_buf, size_t md_len,
spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Submit a copy request to the block device.
*
* \ingroup bdev_io_submit_functions
*
* \param desc Block device descriptor.
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
* \param dst_offset_blocks The destination offset, in blocks, from the start of the block device.
* \param src_offset_blocks The source offset, in blocks, from the start of the block device.
* \param num_blocks The number of blocks to copy.
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
* negated errno on failure, in which case the callback will not be called.
* * -EINVAL - dst_offset_blocks, src_offset_blocks and/or num_blocks are out of range
* * -ENOMEM - spdk_bdev_io buffer cannot be allocated
* * -EBADF - desc not open for writing
* * -ENOTSUP - copy operation is not supported
*/
int spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
/**
* Free an I/O request. This should only be called after the completion callback
* for the I/O has been called and notifies the bdev layer that memory may now

View File

@ -374,6 +374,9 @@ struct spdk_bdev {
/* Maximum write zeroes in unit of logical block */
uint32_t max_write_zeroes;
/* Maximum copy size in unit of logical block */
uint32_t max_copy;
/**
* UUID for this bdev.
*
@ -663,6 +666,11 @@ struct spdk_bdev_io {
/** The offset of next data/hole. */
uint64_t offset;
} seek;
struct {
/** Starting source offset (in blocks) of the bdev for copy I/O. */
uint64_t src_offset_blocks;
} copy;
} bdev;
struct {
/** Channel reference held while messages for this reset are in progress. */

View File

@ -6,8 +6,8 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 10
SO_MINOR := 1
SO_VER := 11
SO_MINOR := 0
ifeq ($(CONFIG_VTUNE),y)
CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify

View File

@ -56,6 +56,11 @@ int __itt_init_ittlib(const char *, __itt_group_id);
#define SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS (8)
#define BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD 1000000
/* The maximum number of children requests for a COPY command
* when splitting into children requests at a time.
*/
#define SPDK_BDEV_MAX_CHILDREN_COPY_REQS (8)
static const char *qos_rpc_type[] = {"rw_ios_per_sec",
"rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec"
};
@ -2345,6 +2350,17 @@ bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io)
return false;
}
static bool
bdev_copy_should_split(struct spdk_bdev_io *bdev_io)
{
if (bdev_io->bdev->max_copy != 0 &&
bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_copy) {
return true;
}
return false;
}
static bool
bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{
@ -2356,6 +2372,8 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io)
return bdev_unmap_should_split(bdev_io);
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
return bdev_write_zeroes_should_split(bdev_io);
case SPDK_BDEV_IO_TYPE_COPY:
return bdev_copy_should_split(bdev_io);
default:
return false;
}
@ -2387,12 +2405,20 @@ _bdev_write_zeroes_split(void *_bdev_io)
return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io);
}
static void bdev_copy_split(struct spdk_bdev_io *bdev_io);
static void
_bdev_copy_split(void *_bdev_io)
{
return bdev_copy_split((struct spdk_bdev_io *)_bdev_io);
}
static int
bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf,
uint64_t num_blocks, uint64_t *offset, uint64_t *remaining)
{
int rc;
uint64_t current_offset, current_remaining;
uint64_t current_offset, current_remaining, current_src_offset;
spdk_bdev_io_wait_cb io_wait_fn;
current_offset = *offset;
@ -2432,6 +2458,15 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
current_offset, num_blocks,
bdev_io_split_done, bdev_io);
break;
case SPDK_BDEV_IO_TYPE_COPY:
io_wait_fn = _bdev_copy_split;
current_src_offset = bdev_io->u.bdev.copy.src_offset_blocks +
(current_offset - bdev_io->u.bdev.offset_blocks);
rc = spdk_bdev_copy_blocks(bdev_io->internal.desc,
spdk_io_channel_from_ctx(bdev_io->internal.ch),
current_offset, current_src_offset, num_blocks,
bdev_io_split_done, bdev_io);
break;
default:
assert(false);
rc = -EINVAL;
@ -2655,6 +2690,30 @@ bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
}
}
static void
bdev_copy_split(struct spdk_bdev_io *bdev_io)
{
uint64_t offset, copy_blocks, remaining;
uint32_t num_children_reqs = 0;
int rc;
offset = bdev_io->u.bdev.split_current_offset_blocks;
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
assert(bdev_io->bdev->max_copy != 0);
while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) {
copy_blocks = spdk_min(remaining, bdev_io->bdev->max_copy);
rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, copy_blocks,
&offset, &remaining);
if (spdk_likely(rc == 0)) {
num_children_reqs++;
} else {
return;
}
}
}
static void
parent_bdev_io_complete(void *ctx, int rc)
{
@ -2718,6 +2777,9 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_write_zeroes_split(parent_io);
break;
case SPDK_BDEV_IO_TYPE_COPY:
bdev_copy_split(parent_io);
break;
default:
assert(false);
break;
@ -2752,6 +2814,9 @@ bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_write_zeroes_split(bdev_io);
break;
case SPDK_BDEV_IO_TYPE_COPY:
bdev_copy_split(bdev_io);
break;
default:
assert(false);
break;
@ -2845,6 +2910,7 @@ bdev_io_range_is_locked(struct spdk_bdev_io *bdev_io, struct lba_range *range)
case SPDK_BDEV_IO_TYPE_UNMAP:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
case SPDK_BDEV_IO_TYPE_ZCOPY:
case SPDK_BDEV_IO_TYPE_COPY:
r.offset = bdev_io->u.bdev.offset_blocks;
r.length = bdev_io->u.bdev.num_blocks;
if (!bdev_lba_range_overlapped(range, &r)) {
@ -3960,6 +4026,12 @@ spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
}
}
uint32_t
spdk_bdev_get_max_copy(const struct spdk_bdev *bdev)
{
return bdev->max_copy;
}
uint64_t
spdk_bdev_get_qd(const struct spdk_bdev *bdev)
{
@ -8100,6 +8172,56 @@ spdk_bdev_for_each_channel(struct spdk_bdev *bdev, spdk_bdev_for_each_channel_ms
iter, bdev_each_channel_cpl);
}
int
spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t dst_offset_blocks, uint64_t src_offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
if (!desc->write) {
return -EBADF;
}
if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY))) {
SPDK_DEBUGLOG(bdev, "Copy IO type is not supported\n");
return -ENOTSUP;
}
if (num_blocks == 0) {
SPDK_ERRLOG("Can't copy 0 blocks\n");
return -EINVAL;
}
if (!bdev_io_valid_blocks(bdev, dst_offset_blocks, num_blocks) ||
!bdev_io_valid_blocks(bdev, src_offset_blocks, num_blocks)) {
SPDK_DEBUGLOG(bdev,
"Invalid offset or number of blocks: dst %lu, src %lu, count %lu\n",
dst_offset_blocks, src_offset_blocks, num_blocks);
return -EINVAL;
}
bdev_io = bdev_channel_get_io(channel);
if (!bdev_io) {
return -ENOMEM;
}
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
bdev_io->type = SPDK_BDEV_IO_TYPE_COPY;
bdev_io->u.bdev.offset_blocks = dst_offset_blocks;
bdev_io->u.bdev.copy.src_offset_blocks = src_offset_blocks;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.ext_opts = NULL;
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io_submit(bdev_io);
return 0;
}
SPDK_LOG_REGISTER_COMPONENT(bdev)
SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)

View File

@ -105,6 +105,8 @@
spdk_bdev_writev_blocks_ext;
spdk_bdev_for_each_channel;
spdk_bdev_for_each_channel_continue;
spdk_bdev_get_max_copy;
spdk_bdev_copy_blocks;
# Public functions in bdev_module.h
spdk_bdev_register;

View File

@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 5
SO_VER := 6
SO_MINOR := 0
ifdef SPDK_FTL_VSS_EMU