diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 0e4126890..21aa2f610 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -436,6 +436,18 @@ uint32_t spdk_bdev_get_md_size(const struct spdk_bdev *bdev); */ bool spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev); +/** + * Query whether metadata is interleaved with block data or separated + * from block data. + * + * \param bdev Block device to query. + * \return true if metadata is separated from block data, false + * otherwise. + * + * Note this function is valid only if there is metadata. + */ +bool spdk_bdev_is_md_separate(const struct spdk_bdev *bdev); + /** * Get block device data block size. * @@ -620,6 +632,33 @@ int spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *c void *buf, uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a read request to the bdev on the given channel. This function uses + * separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to read into. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, int64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Submit a read request to the bdev on the given channel. This differs from * spdk_bdev_read by allowing the data buffer to be described in a scatter @@ -678,6 +717,38 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer into the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Submit a write request to the bdev on the given channel. * @@ -726,6 +797,34 @@ int spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * void *buf, uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a write request to the bdev on the given channel. This function uses + * separate buffer for metadata transfer (valid only if bdev supports this + * mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param buf Data buffer to written from. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Submit a write request to the bdev on the given channel. This differs from * spdk_bdev_write by allowing the data buffer to be described in a scatter @@ -786,6 +885,39 @@ int spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer out of the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Submit a request to acquire a data buffer that represents the given * range of blocks. The data buffer is placed in the spdk_bdev_io structure @@ -1197,6 +1329,16 @@ void spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, */ void spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp); +/** + * Get metadata buffer. Only makes sense if the IO uses separate buffer for + * metadata transfer. + * + * \param bdev_io I/O to retrieve the buffer from. + * \return Pointer to metadata buffer, NULL if the IO doesn't use separate + * buffer for metadata transfer. + */ +void *spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io); + typedef void (*spdk_bdev_histogram_status_cb)(void *cb_arg, int status); typedef void (*spdk_bdev_histogram_data_cb)(void *cb_arg, int status, struct spdk_histogram_data *histogram); diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index b8cfa5ef6..ac6549f9f 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -436,6 +436,9 @@ struct spdk_bdev_io { /** For SG buffer cases, number of iovecs in iovec array. */ int iovcnt; + /* Metadata buffer */ + void *md_buf; + /** Total size of data to be transferred. */ uint64_t num_blocks; @@ -556,6 +559,7 @@ struct spdk_bdev_io { struct iovec bounce_iov; struct iovec *orig_iovs; int orig_iovcnt; + void *orig_md_buf; /** Callback for when buf is allocated */ spdk_bdev_io_get_buf_cb get_buf_cb; @@ -756,6 +760,15 @@ void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb */ void spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len); +/** + * Set the given buffer as metadata buffer described by this bdev_io. + * + * \param bdev_io I/O to set the buffer on. + * \param md_buf The buffer to set as the active metadata buffer. + * \param len The length of the metadata buffer. + */ +void spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len); + /** * Complete a bdev_io * diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 8472ee389..233337a5c 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -299,6 +299,16 @@ static void _spdk_bdev_write_zero_buffer_next(void *_bdev_io); static void _spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i); static void _spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status); +static int +_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +static int +_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg); + void spdk_bdev_get_opts(struct spdk_bdev_opts *opts) { @@ -438,8 +448,15 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len) iovs[0].iov_len = len; } +void +spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len) +{ + assert((len / spdk_bdev_get_md_size(bdev_io->bdev)) >= bdev_io->u.bdev.num_blocks); + bdev_io->u.bdev.md_buf = md_buf; +} + static bool -_is_buf_allocated(struct iovec *iovs) +_is_buf_allocated(const struct iovec *iovs) { if (iovs == NULL) { return false; @@ -514,12 +531,25 @@ _bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len) } } +static void +_bdev_io_set_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len) +{ + /* save original md_buf */ + bdev_io->internal.orig_md_buf = bdev_io->u.bdev.md_buf; + /* set bounce md_buf */ + bdev_io->u.bdev.md_buf = md_buf; + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + memcpy(md_buf, bdev_io->internal.orig_md_buf, len); + } +} + static void _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len) { struct spdk_bdev *bdev = bdev_io->bdev; bool buf_allocated; - uint64_t alignment; + uint64_t md_len, alignment; void *aligned_buf; alignment = spdk_bdev_get_buf_align(bdev); @@ -532,6 +562,19 @@ _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len) spdk_bdev_io_set_buf(bdev_io, aligned_buf, len); } + if (spdk_bdev_is_md_separate(bdev)) { + aligned_buf = (char *)aligned_buf + len; + md_len = bdev_io->u.bdev.num_blocks * bdev->md_len; + + assert(((uintptr_t)aligned_buf & (alignment - 1)) == 0); + + if (bdev_io->u.bdev.md_buf != NULL) { + _bdev_io_set_bounce_md_buf(bdev_io, aligned_buf, md_len); + } else { + spdk_bdev_io_set_md_buf(bdev_io, aligned_buf, md_len); + } + } + bdev_io->internal.buf = buf; bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io, true); } @@ -539,21 +582,23 @@ _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len) static void spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) { + struct spdk_bdev *bdev = bdev_io->bdev; struct spdk_mempool *pool; struct spdk_bdev_io *tmp; bdev_io_stailq_t *stailq; struct spdk_bdev_mgmt_channel *ch; - uint64_t buf_len, alignment; + uint64_t buf_len, md_len, alignment; void *buf; buf = bdev_io->internal.buf; buf_len = bdev_io->internal.buf_len; - alignment = spdk_bdev_get_buf_align(bdev_io->bdev); + md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0; + alignment = spdk_bdev_get_buf_align(bdev); ch = bdev_io->internal.ch->shared_resource->mgmt_ch; bdev_io->internal.buf = NULL; - if (buf_len + alignment <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) + + if (buf_len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) + SPDK_BDEV_POOL_ALIGNMENT) { pool = g_bdev_mgr.buf_small_pool; stailq = &ch->need_buf_small; @@ -574,11 +619,18 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) static void _bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io) { + if (spdk_likely(bdev_io->internal.orig_iovcnt == 0)) { + assert(bdev_io->internal.orig_md_buf == NULL); + return; + } + /* if this is read path, copy data from bounce buffer to original buffer */ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) { - _copy_buf_to_iovs(bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt, - bdev_io->internal.bounce_iov.iov_base, bdev_io->internal.bounce_iov.iov_len); + _copy_buf_to_iovs(bdev_io->internal.orig_iovs, + bdev_io->internal.orig_iovcnt, + bdev_io->internal.bounce_iov.iov_base, + bdev_io->internal.bounce_iov.iov_len); } /* set orignal buffer for this io */ bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt; @@ -586,21 +638,38 @@ _bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io) /* disable bouncing buffer for this io */ bdev_io->internal.orig_iovcnt = 0; bdev_io->internal.orig_iovs = NULL; - /* return bounce buffer to the pool */ + + /* do the same for metadata buffer */ + if (spdk_unlikely(bdev_io->internal.orig_md_buf != NULL)) { + assert(spdk_bdev_is_md_separate(bdev_io->bdev)); + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && + bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) { + memcpy(bdev_io->internal.orig_md_buf, bdev_io->u.bdev.md_buf, + bdev_io->u.bdev.num_blocks * spdk_bdev_get_md_size(bdev_io->bdev)); + } + + bdev_io->u.bdev.md_buf = bdev_io->internal.orig_md_buf; + bdev_io->internal.orig_md_buf = NULL; + } + spdk_bdev_io_put_buf(bdev_io); } void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len) { + struct spdk_bdev *bdev = bdev_io->bdev; struct spdk_mempool *pool; bdev_io_stailq_t *stailq; struct spdk_bdev_mgmt_channel *mgmt_ch; - uint64_t alignment; + uint64_t alignment, md_len; void *buf; assert(cb != NULL); - alignment = spdk_bdev_get_buf_align(bdev_io->bdev); + + alignment = spdk_bdev_get_buf_align(bdev); + md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0; if (_is_buf_allocated(bdev_io->u.bdev.iovs) && _are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) { @@ -609,7 +678,7 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u return; } - if (len + alignment > SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) + + if (len + alignment + md_len > SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) + SPDK_BDEV_POOL_ALIGNMENT) { SPDK_ERRLOG("Length + alignment %" PRIu64 " is larger than allowed\n", len + alignment); @@ -622,7 +691,7 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u bdev_io->internal.buf_len = len; bdev_io->internal.get_buf_cb = cb; - if (len + alignment <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) + + if (len + alignment + md_len <= SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) + SPDK_BDEV_POOL_ALIGNMENT) { pool = g_bdev_mgr.buf_small_pool; stailq = &mgmt_ch->need_buf_small; @@ -1531,6 +1600,7 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io) struct iovec *parent_iov, *iov; uint64_t parent_iov_offset, iov_len; uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt; + void *md_buf = NULL; int rc; remaining = bdev_io->u.bdev.split_remaining_num_blocks; @@ -1554,6 +1624,13 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io) to_next_boundary_bytes = to_next_boundary * blocklen; iov = &bdev_io->child_iov[child_iovcnt]; iovcnt = 0; + + if (bdev_io->u.bdev.md_buf) { + assert((parent_iov_offset % blocklen) > 0); + md_buf = (char *)bdev_io->u.bdev.md_buf + (parent_iov_offset / blocklen) * + spdk_bdev_get_md_size(bdev_io->bdev); + } + while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) { parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos]; @@ -1593,15 +1670,17 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io) bdev_io->u.bdev.split_outstanding++; if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { - rc = spdk_bdev_readv_blocks(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - iov, iovcnt, current_offset, to_next_boundary, - _spdk_bdev_io_split_done, bdev_io); + rc = _spdk_bdev_readv_blocks_with_md(bdev_io->internal.desc, + spdk_io_channel_from_ctx(bdev_io->internal.ch), + iov, iovcnt, md_buf, current_offset, + to_next_boundary, + _spdk_bdev_io_split_done, bdev_io); } else { - rc = spdk_bdev_writev_blocks(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - iov, iovcnt, current_offset, to_next_boundary, - _spdk_bdev_io_split_done, bdev_io); + rc = _spdk_bdev_writev_blocks_with_md(bdev_io->internal.desc, + spdk_io_channel_from_ctx(bdev_io->internal.ch), + iov, iovcnt, md_buf, current_offset, + to_next_boundary, + _spdk_bdev_io_split_done, bdev_io); } if (rc == 0) { @@ -1783,6 +1862,7 @@ spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, bdev_io->internal.io_submit_ch = NULL; bdev_io->internal.orig_iovs = NULL; bdev_io->internal.orig_iovcnt = 0; + bdev_io->internal.orig_md_buf = NULL; } static bool @@ -2380,6 +2460,12 @@ spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev) return (bdev->md_len != 0) && bdev->md_interleave; } +bool +spdk_bdev_is_md_separate(const struct spdk_bdev *bdev) +{ + return (bdev->md_len != 0) && !bdev->md_interleave; +} + uint32_t spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev) { @@ -2567,24 +2653,16 @@ spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64 return true; } -int -spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) +static bool +_bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf) { - uint64_t offset_blocks, num_blocks; - - if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { - return -EINVAL; - } - - return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); + return _is_buf_allocated(iovs) == (md_buf != NULL); } -int -spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg) +static int +_spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, + void *md_buf, int64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) { struct spdk_bdev *bdev = desc->bdev; struct spdk_bdev_io *bdev_io; @@ -2606,6 +2684,7 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, bdev_io->u.bdev.iovs[0].iov_base = buf; bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen; bdev_io->u.bdev.iovcnt = 1; + bdev_io->u.bdev.md_buf = md_buf; bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); @@ -2614,6 +2693,50 @@ spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, return 0; } +int +spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + uint64_t offset_blocks, num_blocks; + + if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { + return -EINVAL; + } + + return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); +} + +int +spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + return _spdk_bdev_read_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks, + cb, cb_arg); +} + +int +spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md_buf, int64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct iovec iov = { + .iov_base = buf, + }; + + if (!spdk_bdev_is_md_separate(desc->bdev)) { + return -EINVAL; + } + + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } + + return _spdk_bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); +} + int spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, @@ -2629,10 +2752,10 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); } -int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct iovec *iov, int iovcnt, - uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg) +static int +_spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, + uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) { struct spdk_bdev *bdev = desc->bdev; struct spdk_bdev_io *bdev_io; @@ -2652,6 +2775,72 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * bdev_io->type = SPDK_BDEV_IO_TYPE_READ; bdev_io->u.bdev.iovs = iov; bdev_io->u.bdev.iovcnt = iovcnt; + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + spdk_bdev_io_submit(bdev_io); + return 0; +} + +int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, + num_blocks, cb, cb_arg); +} + +int +spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (!spdk_bdev_is_md_separate(desc->bdev)) { + return -EINVAL; + } + + if (!_bdev_io_check_md_buf(iov, md_buf)) { + return -EINVAL; + } + + return _spdk_bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, cb, cb_arg); +} + +static int +_spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev *bdev = desc->bdev; + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + + if (!desc->write) { + return -EBADF; + } + + if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; + } + + bdev_io = spdk_bdev_get_io(channel); + if (!bdev_io) { + return -ENOMEM; + } + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; + bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; + bdev_io->u.bdev.iovs = &bdev_io->iov; + bdev_io->u.bdev.iovs[0].iov_base = buf; + bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen; + bdev_io->u.bdev.iovcnt = 1; + bdev_io->u.bdev.md_buf = md_buf; bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); @@ -2678,6 +2867,37 @@ int spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + return _spdk_bdev_write_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks, + cb, cb_arg); +} + +int +spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct iovec iov = { + .iov_base = buf, + }; + + if (!spdk_bdev_is_md_separate(desc->bdev)) { + return -EINVAL; + } + + if (!_bdev_io_check_md_buf(&iov, md_buf)) { + return -EINVAL; + } + + return _spdk_bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); +} + +static int +_spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) { struct spdk_bdev *bdev = desc->bdev; struct spdk_bdev_io *bdev_io; @@ -2699,10 +2919,9 @@ spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, bdev_io->internal.ch = channel; bdev_io->internal.desc = desc; bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; - bdev_io->u.bdev.iovs = &bdev_io->iov; - bdev_io->u.bdev.iovs[0].iov_base = buf; - bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev->blocklen; - bdev_io->u.bdev.iovcnt = 1; + bdev_io->u.bdev.iovs = iov; + bdev_io->u.bdev.iovcnt = iovcnt; + bdev_io->u.bdev.md_buf = md_buf; bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); @@ -2732,34 +2951,26 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) { - struct spdk_bdev *bdev = desc->bdev; - struct spdk_bdev_io *bdev_io; - struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, + num_blocks, cb, cb_arg); +} - if (!desc->write) { - return -EBADF; - } - - if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { +int +spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (!spdk_bdev_is_md_separate(desc->bdev)) { return -EINVAL; } - bdev_io = spdk_bdev_get_io(channel); - if (!bdev_io) { - return -ENOMEM; + if (!_bdev_io_check_md_buf(iov, md_buf)) { + return -EINVAL; } - bdev_io->internal.ch = channel; - bdev_io->internal.desc = desc; - bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; - bdev_io->u.bdev.iovs = iov; - bdev_io->u.bdev.iovcnt = iovcnt; - bdev_io->u.bdev.num_blocks = num_blocks; - bdev_io->u.bdev.offset_blocks = offset_blocks; - spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); - - spdk_bdev_io_submit(bdev_io); - return 0; + return _spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, cb, cb_arg); } static void @@ -3518,9 +3729,7 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta return; } } else { - if (spdk_unlikely(bdev_io->internal.orig_iovcnt > 0)) { - _bdev_io_unset_bounce_buf(bdev_io); - } + _bdev_io_unset_bounce_buf(bdev_io); assert(bdev_ch->io_outstanding > 0); assert(shared_resource->io_outstanding > 0); @@ -4173,6 +4382,25 @@ spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *i } } +void * +spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io) +{ + if (bdev_io == NULL) { + return NULL; + } + + if (!spdk_bdev_is_md_separate(bdev_io->bdev)) { + return NULL; + } + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ || + bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + return bdev_io->u.bdev.md_buf; + } + + return NULL; +} + void spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module) {