bdev: Copy ext_opts when request is split

That is done to correctly handle metadata pointer which is part
of ext_opts structure. It will also be used by the next patch to
remove memory_domain pointer if request which uses local buffers
is split

Force the user to set correct ext_opts size, update API functions
description.

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: I77517d70df34a998d718cc6474fb4c538a42918f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11349
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Alexey Marchuk 2022-01-27 17:01:16 +03:00 committed by Tomasz Zawadzki
parent c20dd8afee
commit 8e7688c5a7
4 changed files with 184 additions and 15 deletions

View File

@ -934,7 +934,8 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options. If set, this structure must be
* valid until the IO is completed.
* valid until the IO is completed. `size` member of this structure is used for ABI compatibility and
* must be set to sizeof(struct spdk_bdev_ext_io_opts).
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return
@ -1136,7 +1137,8 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
* \param cb Called when the request is complete.
* \param cb_arg Argument passed to cb.
* \param opts Optional structure with extended IO request options. If set, this structure must be
* valid until the IO is completed.
* valid until the IO is completed. `size` member of this structure is used for ABI compatibility and
* must be set to sizeof(struct spdk_bdev_ext_io_opts).
*
* \return 0 on success. On success, the callback will always
* be called (even if the request ultimately failed). Return

View File

@ -775,6 +775,9 @@ struct spdk_bdev_io {
/** Pointer to a structure passed by the user in ext API */
struct spdk_bdev_ext_io_opts *ext_opts;
/** Copy of user's opts, used when I/O is split */
struct spdk_bdev_ext_io_opts ext_opts_copy;
/** Data transfer completion callback */
void (*data_transfer_cpl)(void *ctx, int rc);
} internal;

View File

@ -362,13 +362,13 @@ static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
struct spdk_bdev_ext_io_opts *opts, bool copy_opts);
static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts);
struct spdk_bdev_ext_io_opts *opts, bool copy_opts);
static int
bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
@ -1876,6 +1876,10 @@ spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
bdev_io_put_buf(bdev_io);
}
if (spdk_unlikely(bdev_io->internal.ext_opts == &bdev_io->internal.ext_opts_copy)) {
memset(&bdev_io->internal.ext_opts_copy, 0, sizeof(bdev_io->internal.ext_opts_copy));
}
if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
ch->per_thread_cache_count++;
STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
@ -2321,7 +2325,7 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
iov, iovcnt, md_buf, current_offset,
num_blocks,
bdev_io_split_done, bdev_io,
bdev_io->internal.ext_opts);
bdev_io->internal.ext_opts, true);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
@ -2329,7 +2333,7 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
iov, iovcnt, md_buf, current_offset,
num_blocks,
bdev_io_split_done, bdev_io,
bdev_io->internal.ext_opts);
bdev_io->internal.ext_opts, true);
break;
case SPDK_BDEV_IO_TYPE_UNMAP:
io_wait_fn = _bdev_unmap_split;
@ -4084,11 +4088,12 @@ static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
struct spdk_bdev_ext_io_opts *opts_copy;
if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
return -EINVAL;
@ -4111,7 +4116,17 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
bdev_io->internal.ext_opts = opts;
bdev_io->u.bdev.ext_opts = opts;
if (opts && copy_opts) {
assert(opts->size <= sizeof(*opts));
opts_copy = &bdev_io->internal.ext_opts_copy;
memcpy(opts_copy, opts, opts->size);
bdev_io->internal.ext_opts_copy.metadata = md_buf;
bdev_io->internal.ext_opts = opts_copy;
bdev_io->u.bdev.ext_opts = opts_copy;
}
bdev_io_submit(bdev_io);
return 0;
}
@ -4121,7 +4136,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg, NULL);
num_blocks, cb, cb_arg, NULL, false);
}
int
@ -4139,7 +4154,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
}
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg, NULL);
num_blocks, cb, cb_arg, NULL, false);
}
int
@ -4152,6 +4167,9 @@ spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *
void *md = NULL;
if (opts) {
if (spdk_unlikely(!opts->size || opts->size > sizeof(struct spdk_bdev_ext_io_opts))) {
return -EINVAL;
}
md = opts->metadata;
}
@ -4164,7 +4182,7 @@ spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *
}
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
num_blocks, cb, cb_arg, opts);
num_blocks, cb, cb_arg, opts, false);
}
static int
@ -4256,11 +4274,12 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
struct spdk_bdev_ext_io_opts *opts_copy;
if (!desc->write) {
return -EBADF;
@ -4287,7 +4306,17 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
bdev_io->internal.ext_opts = opts;
bdev_io->u.bdev.ext_opts = opts;
if (opts && copy_opts) {
assert(opts->size <= sizeof(*opts));
opts_copy = &bdev_io->internal.ext_opts_copy;
memcpy(opts_copy, opts, opts->size);
bdev_io->internal.ext_opts_copy.metadata = md_buf;
bdev_io->internal.ext_opts = opts_copy;
bdev_io->u.bdev.ext_opts = opts_copy;
}
bdev_io_submit(bdev_io);
return 0;
}
@ -4314,7 +4343,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg, NULL);
num_blocks, cb, cb_arg, NULL, false);
}
int
@ -4332,7 +4361,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
}
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg, NULL);
num_blocks, cb, cb_arg, NULL, false);
}
int
@ -4345,6 +4374,9 @@ spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel
void *md = NULL;
if (opts) {
if (spdk_unlikely(!opts->size || opts->size > sizeof(struct spdk_bdev_ext_io_opts))) {
return -EINVAL;
}
md = opts->metadata;
}
@ -4357,7 +4389,7 @@ spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel
}
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
num_blocks, cb, cb_arg, opts);
num_blocks, cb, cb_arg, opts, false);
}
static void

View File

@ -238,6 +238,9 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
if (expected_io->md_buf != NULL) {
CU_ASSERT(expected_io->md_buf == bdev_io->u.bdev.md_buf);
if (bdev_io->internal.ext_opts) {
CU_ASSERT(expected_io->md_buf == bdev_io->internal.ext_opts->metadata);
}
}
if (expected_io->length == 0) {
@ -4900,7 +4903,8 @@ bdev_writev_readv_ext(void)
struct iovec iov = { .iov_base = (void *)0xbaaddead, .iov_len = 0x1000 };
struct ut_expected_io *expected_io;
struct spdk_bdev_ext_io_opts ext_io_opts = {
.metadata = (void *)0xFF000000
.metadata = (void *)0xFF000000,
.size = sizeof(ext_io_opts)
};
int rc;
@ -4947,6 +4951,134 @@ bdev_writev_readv_ext(void)
stub_complete_io(1);
CU_ASSERT(g_io_done == true);
/* Test invalid ext_opts size */
ext_io_opts.size = 0;
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc != 0);
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc != 0);
ext_io_opts.size = sizeof(ext_io_opts) * 2;
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc != 0);
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc != 0);
/* Check that IO request with ext_opts and metadata is split correctly
* Offset 14, length 8, payload 0xF000
* Child - Offset 14, length 2, payload 0xF000
* Child - Offset 16, length 6, payload 0xF000 + 2 * 512
*/
bdev->optimal_io_boundary = 16;
bdev->split_on_optimal_io_boundary = true;
bdev->md_interleave = false;
bdev->md_len = 8;
iov.iov_base = (void *)0xF000;
iov.iov_len = 4096;
memset(&ext_io_opts, 0, sizeof(ext_io_opts));
ext_io_opts.metadata = (void *)0xFF000000;
ext_io_opts.size = sizeof(ext_io_opts);
g_io_done = false;
/* read */
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
expected_io->md_buf = ext_io_opts.metadata;
ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
stub_complete_io(2);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
/* write */
g_io_done = false;
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
expected_io->md_buf = ext_io_opts.metadata;
ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
stub_complete_io(2);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
/* ext_opts contains metadata but ext_opts::size doesn't cover metadata pointer.
* Check that IO request with ext_opts and metadata is split correctly - metadata pointer is not copied, so split
* requests do not have metadata
* Offset 14, length 8, payload 0xF000
* Child - Offset 14, length 2, payload 0xF000
* Child - Offset 16, length 6, payload 0xF000 + 2 * 512
*/
iov.iov_base = (void *)0xF000;
iov.iov_len = 4096;
memset(&ext_io_opts, 0, sizeof(ext_io_opts));
ext_io_opts.metadata = (void *)0xFF000000;
ext_io_opts.size = offsetof(struct spdk_bdev_ext_io_opts, metadata);;
g_io_done = false;
/* read */
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
/* Split request must not contain metadata pointer */
expected_io->md_buf = NULL;
ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
expected_io->md_buf = NULL;
ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
stub_complete_io(2);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
/* write */
g_io_done = false;
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
expected_io->md_buf = NULL;
ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
/* Split request must not contain metadata pointer */
expected_io->md_buf = NULL;
ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
CU_ASSERT(rc == 0);
CU_ASSERT(g_io_done == false);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
stub_complete_io(2);
CU_ASSERT(g_io_done == true);
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
spdk_put_io_channel(io_ch);
spdk_bdev_close(desc);
free_bdev(bdev);