diff --git a/CHANGELOG.md b/CHANGELOG.md index d48eb780a..bca4f5032 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ New API `spdk_bdev_get_memory_domains` has been added, it allows to get SPDK memory domains used by bdev. +New API functions `spdk_bdev_readv_blocks_ext` and `spdk_bdev_writev_blocks_ext` have been added. +These functions accept `spdk_bdev_ext_io_opts` structure with extended IO request +options, e.g. DMA memory domain which describes data that may belong to another memory domain and +can't be accessed directly. + ### dma A new library, lib/dma, has been added. This library provides the necessary infrastructure for diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index abd77d9cd..7e6f14cc6 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -81,9 +81,6 @@ struct spdk_bdev_media_event { */ struct spdk_bdev; -/** Forward declaration of spdk memory domain */ -struct spdk_memory_domain; - /** * Block device remove callback. * @@ -203,6 +200,24 @@ struct spdk_bdev_opts { uint32_t large_buf_pool_size; }; +/** + * Structure with optional IO request parameters + * The content of this structure must be valid until the IO request is completed + */ +struct spdk_bdev_ext_io_opts { + /** Size of this structure in bytes */ + size_t size; + /** Memory domain which describes payload in this IO request. bdev must support DMA device type that + * can access this memory domain, refer to \ref spdk_bdev_get_memory_domains and \erf spdk_memory_domain_get_dma_device_type + * If set, that means that data buffers can't be accessed directly and the memory domain must + * be used to fetch data to local buffers or to translate data to another memory domain */ + struct spdk_memory_domain *memory_domain; + /** Context to be passed to memory domain operations */ + void *memory_domain_ctx; + /** Metadata buffer, optional */ + void *metadata; +}; + /** * Get the options for the bdev module. * @@ -900,6 +915,40 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer into the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer, optional. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * \param opts Optional structure with extended IO request options. If set, this structure must be + * valid until the IO is completed. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported or opts_size is incorrect + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); + /** * Submit a write request to the bdev on the given channel. * @@ -1069,6 +1118,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_ uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer out of the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer, optional. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * \param opts Optional structure with extended IO request options. If set, this structure must be + * valid until the IO is completed. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported or opts_size is incorrect + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); + /** * Submit a compare request to the bdev on the given channel. * diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 0c38447a1..b55b062f1 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -750,6 +750,9 @@ struct spdk_bdev_io { /** Enables queuing parent I/O when no bdev_ios available for split children. */ struct spdk_bdev_io_wait_entry waitq_entry; + + /** Pointer to a structure passed by the user in ext API */ + struct spdk_bdev_ext_io_opts *ext_opts; } internal; /** diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 76744e973..2d804ca39 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -364,12 +364,14 @@ static void bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status); static int bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, - uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); static int bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg); + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); static int bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, @@ -2119,14 +2121,16 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt spdk_io_channel_from_ctx(bdev_io->internal.ch), iov, iovcnt, md_buf, current_offset, num_blocks, - bdev_io_split_done, bdev_io); + bdev_io_split_done, bdev_io, + bdev_io->internal.ext_opts); break; case SPDK_BDEV_IO_TYPE_WRITE: rc = bdev_writev_blocks_with_md(bdev_io->internal.desc, spdk_io_channel_from_ctx(bdev_io->internal.ch), iov, iovcnt, md_buf, current_offset, num_blocks, - bdev_io_split_done, bdev_io); + bdev_io_split_done, bdev_io, + bdev_io->internal.ext_opts); break; case SPDK_BDEV_IO_TYPE_UNMAP: io_wait_fn = _bdev_unmap_split; @@ -2624,6 +2628,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io, bdev_io->num_retries = 0; bdev_io->internal.get_buf_cb = NULL; bdev_io->internal.get_aux_buf_cb = NULL; + bdev_io->internal.ext_opts = NULL; } static bool @@ -3834,7 +3839,8 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, static int bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, - uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) { struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); struct spdk_bdev_io *bdev_io; @@ -3858,6 +3864,7 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->internal.ext_opts = opts; bdev_io_submit(bdev_io); return 0; @@ -3869,7 +3876,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * spdk_bdev_io_completion_cb cb, void *cb_arg) { return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, - num_blocks, cb, cb_arg); + num_blocks, cb, cb_arg, NULL); } int @@ -3887,7 +3894,32 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann } return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, - num_blocks, cb, cb_arg); + num_blocks, cb, cb_arg, NULL); +} + +int +spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) +{ + void *md = NULL; + + if (opts) { + md = opts->metadata; + } + + if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } + + if (md && !_bdev_io_check_md_buf(iov, md)) { + return -EINVAL; + } + + return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks, + num_blocks, cb, cb_arg, opts); } static int @@ -3977,7 +4009,8 @@ static int bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg) + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) { struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); struct spdk_bdev_io *bdev_io; @@ -4005,6 +4038,7 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel * bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->internal.ext_opts = opts; bdev_io_submit(bdev_io); return 0; @@ -4033,7 +4067,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, spdk_bdev_io_completion_cb cb, void *cb_arg) { return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, - num_blocks, cb, cb_arg); + num_blocks, cb, cb_arg, NULL); } int @@ -4051,7 +4085,32 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan } return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, - num_blocks, cb, cb_arg); + num_blocks, cb, cb_arg, NULL); +} + +int +spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) +{ + void *md = NULL; + + if (opts) { + md = opts->metadata; + } + + if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } + + if (md && !_bdev_io_check_md_buf(iov, md)) { + return -EINVAL; + } + + return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks, + num_blocks, cb, cb_arg, opts); } static void diff --git a/lib/bdev/spdk_bdev.map b/lib/bdev/spdk_bdev.map index f7159e884..8862da661 100644 --- a/lib/bdev/spdk_bdev.map +++ b/lib/bdev/spdk_bdev.map @@ -95,6 +95,8 @@ spdk_bdev_histogram_get; spdk_bdev_get_media_events; spdk_bdev_get_memory_domains; + spdk_bdev_readv_blocks_ext; + spdk_bdev_writev_blocks_ext; # Public functions in bdev_module.h spdk_bdev_register; diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 26bd4b890..c7325596c 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -85,6 +85,7 @@ struct ut_expected_io { int iovcnt; struct iovec iov[BDEV_IO_NUM_CHILD_IOV]; void *md_buf; + struct spdk_bdev_ext_io_opts *ext_io_opts; TAILQ_ENTRY(ut_expected_io) link; }; @@ -261,6 +262,10 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) CU_ASSERT(iov->iov_base == expected_iov->iov_base); } + if (expected_io->ext_io_opts) { + CU_ASSERT(expected_io->ext_io_opts == bdev_io->internal.ext_opts) + } + free(expected_io); } @@ -4818,6 +4823,69 @@ bdev_get_memory_domains(void) CU_ASSERT(rc == 0); } +static void +bdev_writev_readv_ext(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc = NULL; + struct spdk_io_channel *io_ch; + struct iovec iov = { .iov_base = (void *)0xbaaddead, .iov_len = 0x1000 }; + struct ut_expected_io *expected_io; + struct spdk_bdev_ext_io_opts ext_io_opts = { + .metadata = (void *)0xFF000000 + }; + int rc; + + spdk_bdev_initialize(bdev_init_cb, NULL); + + bdev = allocate_bdev("bdev0"); + bdev->md_interleave = false; + bdev->md_len = 8; + + rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(desc != NULL); + CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc)); + io_ch = spdk_bdev_get_io_channel(desc); + CU_ASSERT(io_ch != NULL); + + g_io_done = false; + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1); + expected_io->md_buf = ext_io_opts.metadata; + expected_io->ext_io_opts = &ext_io_opts; + ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + + rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts); + + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + + g_io_done = false; + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1); + expected_io->md_buf = ext_io_opts.metadata; + expected_io->ext_io_opts = &ext_io_opts; + ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + + rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts); + + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + + spdk_put_io_channel(io_ch); + spdk_bdev_close(desc); + free_bdev(bdev); + spdk_bdev_finish(bdev_fini_cb, NULL); + poll_threads(); +} + int main(int argc, char **argv) { @@ -4864,6 +4932,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_set_options_test); CU_ADD_TEST(suite, bdev_multi_allocation); CU_ADD_TEST(suite, bdev_get_memory_domains); + CU_ADD_TEST(suite, bdev_writev_readv_ext); allocate_cores(1); allocate_threads(1);