diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 9021dc74a..fe85513cb 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -89,6 +89,7 @@ enum spdk_bdev_io_type { SPDK_BDEV_IO_TYPE_RESET, SPDK_BDEV_IO_TYPE_NVME_ADMIN, SPDK_BDEV_IO_TYPE_NVME_IO, + SPDK_BDEV_IO_TYPE_NVME_IO_MD, SPDK_BDEV_IO_TYPE_WRITE_ZEROES, }; @@ -682,6 +683,35 @@ int spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *bdev_desc, void *buf, size_t nbytes, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit an NVMe I/O command to the bdev. This passes directly through + * the block layer to the device. Support for NVMe passthru is optional, + * indicated by calling spdk_bdev_io_type_supported(). + * + * The SGL/PRP will be automated generated based on the given buffer, + * so that portion of the command may be left empty. Also, the namespace + * id (nsid) will be populated automatically. + * + * \param bdev Block device + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param cmd The raw NVMe command. Must be in the NVM command set. + * \param buf Data buffer to written from. + * \param nbytes The number of bytes to transfer. buf must be greater than or equal to this size. + * \param md_buf Meta data buffer to written from. + * \param md_len md_buf size to transfer. md_buf must be greater than or equal to this size. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + */ +int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc, + struct spdk_io_channel *ch, + const struct spdk_nvme_cmd *cmd, + void *buf, size_t nbytes, void *md_buf, size_t md_len, + spdk_bdev_io_completion_cb cb, void *cb_arg); + /** * Free an I/O request. This should be called after the callback for the I/O has * been called and notifies the bdev layer that memory may now be released. diff --git a/include/spdk_internal/bdev.h b/include/spdk_internal/bdev.h index 6e56361f1..782320c73 100644 --- a/include/spdk_internal/bdev.h +++ b/include/spdk_internal/bdev.h @@ -328,6 +328,12 @@ struct spdk_bdev_io { /* The number of bytes to transfer */ size_t nbytes; + + /* The meta data buffer to transfer */ + void *md_buf; + + /* meta data buffer size to transfer */ + size_t md_len; } nvme_passthru; } u; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 4247102c8..90cf4bc76 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -1478,6 +1478,8 @@ spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channe bdev_io->u.nvme_passthru.cmd = *cmd; bdev_io->u.nvme_passthru.buf = buf; bdev_io->u.nvme_passthru.nbytes = nbytes; + bdev_io->u.nvme_passthru.md_buf = NULL; + bdev_io->u.nvme_passthru.md_len = 0; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); @@ -1514,6 +1516,46 @@ spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel * bdev_io->u.nvme_passthru.cmd = *cmd; bdev_io->u.nvme_passthru.buf = buf; bdev_io->u.nvme_passthru.nbytes = nbytes; + bdev_io->u.nvme_passthru.md_buf = NULL; + bdev_io->u.nvme_passthru.md_len = 0; + + spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); + + spdk_bdev_io_submit(bdev_io); + return 0; +} + +int +spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev *bdev = desc->bdev; + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + + if (!desc->write) { + /* + * Do not try to parse the NVMe command - we could maybe use bits in the opcode + * to easily determine if the command is a read or write, but for now just + * do not allow io_passthru with a read-only descriptor. + */ + return -EBADF; + } + + bdev_io = spdk_bdev_get_io(); + if (!bdev_io) { + SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); + return -ENOMEM; + } + + bdev_io->ch = channel; + bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD; + bdev_io->u.nvme_passthru.cmd = *cmd; + bdev_io->u.nvme_passthru.buf = buf; + bdev_io->u.nvme_passthru.nbytes = nbytes; + bdev_io->u.nvme_passthru.md_buf = md_buf; + bdev_io->u.nvme_passthru.md_len = md_len; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); diff --git a/lib/bdev/nvme/bdev_nvme.c b/lib/bdev/nvme/bdev_nvme.c index 4b42adf16..ce2ed71c6 100644 --- a/lib/bdev/nvme/bdev_nvme.c +++ b/lib/bdev/nvme/bdev_nvme.c @@ -148,6 +148,9 @@ static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_chan static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes); +static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len); static int bdev_nvme_get_ctx_size(void) @@ -437,6 +440,16 @@ _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_ bdev_io->u.nvme_passthru.buf, bdev_io->u.nvme_passthru.nbytes); + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + return bdev_nvme_io_passthru_md((struct nvme_bdev *)bdev_io->bdev->ctxt, + ch, + (struct nvme_bdev_io *)bdev_io->driver_ctx, + &bdev_io->u.nvme_passthru.cmd, + bdev_io->u.nvme_passthru.buf, + bdev_io->u.nvme_passthru.nbytes, + bdev_io->u.nvme_passthru.md_buf, + bdev_io->u.nvme_passthru.md_len); + default: return -EINVAL; } @@ -472,6 +485,9 @@ bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) case SPDK_BDEV_IO_TYPE_NVME_IO: return true; + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + return spdk_nvme_ns_get_md_size(nbdev->ns) ? true : false; + case SPDK_BDEV_IO_TYPE_UNMAP: cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_ctrlr->ctrlr); return cdata->oncs.dsm; @@ -1358,6 +1374,34 @@ bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, (uint32_t)nbytes, bdev_nvme_queued_done, bio); } +static int +bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, + struct nvme_bdev_io *bio, + struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len) +{ + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + size_t nr_sectors = nbytes / spdk_nvme_ns_get_sector_size(nbdev->ns); + + if (nbytes > UINT32_MAX) { + SPDK_ERRLOG("nbytes is greater than UINT32_MAX.\n"); + return -EINVAL; + } + + if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->ns)) { + SPDK_ERRLOG("invalid meta data buffer size\n"); + return -EINVAL; + } + + /* + * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid, + * so fill it out automatically. + */ + cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns); + + return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf, + (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio); +} + static void bdev_nvme_get_spdk_running_config(FILE *fp) {