bdev: Add a zero copy I/O path
Add a ZCOPY operation to obtain buffers that represent data regions on the backing block device. Change-Id: Ie941c16ee051d0009e3888b52b8f41773bba47b3 Signed-off-by: Ben Walker <benjamin.walker@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/386166 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
b92c3d412d
commit
84850dacd7
@ -49,6 +49,9 @@ block except for metadata.
|
||||
spdk_vbdev_register() has been deprecated. spdk_bdev_register() should be used
|
||||
instead.
|
||||
|
||||
A mechanism for acquiring and releasing data buffers from bdev modules, used
|
||||
to perform zero copy operations, was added.
|
||||
|
||||
### NVMe-oF Target
|
||||
|
||||
Support for per-device shared receive queues in the RDMA transport has been added.
|
||||
|
@ -110,6 +110,7 @@ enum spdk_bdev_io_type {
|
||||
SPDK_BDEV_IO_TYPE_NVME_IO,
|
||||
SPDK_BDEV_IO_TYPE_NVME_IO_MD,
|
||||
SPDK_BDEV_IO_TYPE_WRITE_ZEROES,
|
||||
SPDK_BDEV_IO_TYPE_ZCOPY,
|
||||
SPDK_BDEV_NUM_IO_TYPES /* Keep last */
|
||||
};
|
||||
|
||||
@ -785,6 +786,47 @@ int spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
|
||||
/**
|
||||
* Submit a request to acquire a data buffer that represents the given
|
||||
* range of blocks. The data buffer is placed in the spdk_bdev_io structure
|
||||
* and can be obtained by calling spdk_bdev_io_get_iovec().
|
||||
*
|
||||
* \param desc Block device descriptor
|
||||
* \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
|
||||
* \param offset_blocks The offset, in blocks, from the start of the block device.
|
||||
* \param num_blocks The number of blocks.
|
||||
* \param populate Whether the data buffer should be populated with the
|
||||
* data at the given blocks. Populating the data buffer can
|
||||
* be skipped if the user writes new data to the entire buffer.
|
||||
* \param cb Called when the request is complete.
|
||||
* \param cb_arg Argument passed to cb.
|
||||
*
|
||||
* \return 0 on success. On success, the callback will always
|
||||
* be called (even if the request ultimately failed). Return
|
||||
* negated errno on failure, in which case the callback will not be called.
|
||||
*/
|
||||
int spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
bool populate,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
|
||||
|
||||
/**
|
||||
* Submit a request to release a data buffer representing a range of blocks.
|
||||
*
|
||||
* \param bdev_io I/O request returned in the completion callback of spdk_bdev_zcopy_start().
|
||||
* \param commit Whether to commit the data in the buffers to the blocks before releasing.
|
||||
* The data does not need to be committed if it was not modified.
|
||||
* \param cb Called when the request is complete.
|
||||
* \param cb_arg Argument passed to cb.
|
||||
*
|
||||
* \return 0 on success. On success, the callback will always
|
||||
* be called (even if the request ultimately failed). Return
|
||||
* negated errno on failure, in which case the callback will not be called.
|
||||
*/
|
||||
int spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg);
|
||||
|
||||
/**
|
||||
* Submit a write zeroes request to the bdev on the given channel. This command
|
||||
* ensures that all bytes in the specified range are set to 00h
|
||||
|
@ -453,6 +453,17 @@ struct spdk_bdev_io {
|
||||
|
||||
/** count of outstanding batched split I/Os */
|
||||
uint32_t split_outstanding;
|
||||
|
||||
struct {
|
||||
/** Whether the buffer should be populated with the real data */
|
||||
uint8_t populate : 1;
|
||||
|
||||
/** Whether the buffer should be committed back to disk */
|
||||
uint8_t commit : 1;
|
||||
|
||||
/** True if this request is in the 'start' phase of zcopy. False if in 'end'. */
|
||||
uint8_t start : 1;
|
||||
} zcopy;
|
||||
} bdev;
|
||||
struct {
|
||||
/** Channel reference held while messages for this reset are in progress. */
|
||||
|
@ -424,6 +424,11 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
|
||||
{
|
||||
struct iovec *iovs;
|
||||
|
||||
if (bdev_io->u.bdev.iovs == NULL) {
|
||||
bdev_io->u.bdev.iovs = &bdev_io->iov;
|
||||
bdev_io->u.bdev.iovcnt = 1;
|
||||
}
|
||||
|
||||
iovs = bdev_io->u.bdev.iovs;
|
||||
|
||||
assert(iovs != NULL);
|
||||
@ -436,6 +441,10 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
|
||||
static bool
|
||||
_is_buf_allocated(struct iovec *iovs)
|
||||
{
|
||||
if (iovs == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return iovs[0].iov_base != NULL;
|
||||
}
|
||||
|
||||
@ -585,7 +594,6 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u
|
||||
bool buf_allocated;
|
||||
|
||||
assert(cb != NULL);
|
||||
assert(bdev_io->u.bdev.iovs != NULL);
|
||||
|
||||
alignment = spdk_bdev_get_buf_align(bdev_io->bdev);
|
||||
buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs);
|
||||
@ -2744,6 +2752,65 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
uint64_t offset_blocks, uint64_t num_blocks,
|
||||
bool populate,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
{
|
||||
struct spdk_bdev *bdev = desc->bdev;
|
||||
struct spdk_bdev_io *bdev_io;
|
||||
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
|
||||
|
||||
if (!desc->write) {
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
bdev_io = spdk_bdev_get_io(channel);
|
||||
if (!bdev_io) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bdev_io->internal.ch = channel;
|
||||
bdev_io->internal.desc = desc;
|
||||
bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY;
|
||||
bdev_io->u.bdev.num_blocks = num_blocks;
|
||||
bdev_io->u.bdev.offset_blocks = offset_blocks;
|
||||
bdev_io->u.bdev.zcopy.populate = populate ? 1 : 0;
|
||||
bdev_io->u.bdev.zcopy.commit = 0;
|
||||
bdev_io->u.bdev.zcopy.start = 1;
|
||||
spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb);
|
||||
|
||||
spdk_bdev_io_submit(bdev_io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
|
||||
spdk_bdev_io_completion_cb cb, void *cb_arg)
|
||||
{
|
||||
if (bdev_io->type != SPDK_BDEV_IO_TYPE_ZCOPY) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bdev_io->u.bdev.zcopy.commit = commit ? 1 : 0;
|
||||
bdev_io->u.bdev.zcopy.start = 0;
|
||||
bdev_io->internal.caller_ctx = cb_arg;
|
||||
bdev_io->internal.cb = cb;
|
||||
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
|
||||
|
||||
spdk_bdev_io_submit(bdev_io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
|
||||
uint64_t offset, uint64_t len,
|
||||
@ -4018,10 +4085,8 @@ spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *i
|
||||
|
||||
switch (bdev_io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
iovs = bdev_io->u.bdev.iovs;
|
||||
iovcnt = bdev_io->u.bdev.iovcnt;
|
||||
break;
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
case SPDK_BDEV_IO_TYPE_ZCOPY:
|
||||
iovs = bdev_io->u.bdev.iovs;
|
||||
iovcnt = bdev_io->u.bdev.iovcnt;
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user