From 014baeb8ef96ce9e3885a9af81b35cad0c1e7c1a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 26 Jan 2021 09:42:58 +0000 Subject: [PATCH] nvme: add support for ZNS zone append vector variant We already have support for spdk_nvme_zns_zone_append(), add support for spdk_nvme_zns_zone_appendv() (zone append with NVME_PAYLOAD_TYPE_SGL). _nvme_ns_cmd_rw() currently performs verification of the SGL, if the parameter check_sgl is set. This parameter is set for all calls with payload of type NVME_PAYLOAD_TYPE_SGL. In order to be able to perform the same check_sgl verfication on zone append vectors, we need to refactor _nvme_ns_cmd_rw() a bit. Setting check_sgl ensures that _nvme_ns_cmd_split_request_sgl() or _nvme_ns_cmd_split_request_prp() gets called. These functions will split an oversized I/O into several different requests. However, they also iterate the SGE entries, verifies that the total payload size, total SGE entries is not too many, and that buffers are properly aligned. A proper request will not get split. For zone append, splitting a request into several is not allowed, however, we still want the verification part to be done, such that (e.g.) a non first/last SGE which is not page aligned, will cause the whole request to be rejected. (In the case of spdk_nvme_ns_cmd_write(), a non first/last SGE which is not page aligned will instead cause the request to be split.) An alternative would be to try to rip out the verification part from _nvme_ns_cmd_split_request_sgl() and _nvme_ns_cmd_split_request_prp(). However, that is non-trivial, and would most likely end up with a lot of duplicated code, which would easily get out of sync. Signed-off-by: Niklas Cassel Change-Id: I2728acdcadeb70b1f0ed628704df19e75d14dcca Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6248 Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: Jim Harris --- include/spdk/nvme_zns.h | 63 +++++++++++++++ lib/nvme/nvme_internal.h | 6 ++ lib/nvme/nvme_ns_cmd.c | 67 +++++++++++++++- lib/nvme/nvme_zns.c | 25 ++++++ lib/nvme/spdk_nvme.map | 2 + .../lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c | 78 +++++++++++++++++++ 6 files changed, 239 insertions(+), 2 deletions(-) diff --git a/include/spdk/nvme_zns.h b/include/spdk/nvme_zns.h index d2d3530e3..098674db7 100644 --- a/include/spdk/nvme_zns.h +++ b/include/spdk/nvme_zns.h @@ -177,6 +177,69 @@ int spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_ uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); +/** + * Submit a zone append I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the zone append I/O. + * \param qpair I/O queue pair to submit the request. + * \param zslba Zone Start LBA of the zone that we are appending to. + * \param lba_count Length (in sectors) for the zone append operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_zns_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); + +/** + * Submit a zone append I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the zone append I/O. + * \param qpair I/O queue pair to submit the request. + * \param zslba Zone Start LBA of the zone that we are appending to. + * \param lba_count Length (in sectors) for the zone append operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined in nvme_spec.h, for this I/O. + * \param reset_sgl_fn Callback function to reset scattered payload. + * \param next_sge_fn Callback function to iterate each scattered payload memory + * segment. + * \param metadata Virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size(). + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_zns_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag); + /** * Submit a Close Zone operation to the specified NVMe namespace. * diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index b3e23945c..de30143a7 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -1050,6 +1050,12 @@ int nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qp void *buffer, void *metadata, uint64_t zslba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); +int nvme_ns_cmd_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag); int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 78df6f51e..d3e27c7db 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -416,8 +416,12 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, /* Zone append commands cannot be split. */ if (opc == SPDK_NVME_OPC_ZONE_APPEND) { assert(ns->csi == SPDK_NVME_CSI_ZNS); - _nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag); - return req; + /* + * As long as we disable driver-assisted striping for Zone append commands, + * _nvme_ns_cmd_rw() should never cause a proper request to be split. + * If a request is split, after all, error handling is done in caller functions. + */ + sectors_per_stripe = 0; } /* @@ -815,6 +819,65 @@ nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair } } +int +nvme_ns_cmd_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag) +{ + struct nvme_request *req; + struct nvme_payload payload; + int ret; + + if (!_is_io_flags_valid(io_flags)) { + return -EINVAL; + } + + if (reset_sgl_fn == NULL || next_sge_fn == NULL) { + return -EINVAL; + } + + ret = nvme_ns_cmd_check_zone_append(ns, lba_count, io_flags); + if (ret) { + return ret; + } + + payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata); + + req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, zslba, lba_count, cb_fn, cb_arg, + SPDK_NVME_OPC_ZONE_APPEND, + io_flags, apptag_mask, apptag, true); + if (req != NULL) { + /* + * Zone append commands cannot be split (num_children has to be 0). + * For NVME_PAYLOAD_TYPE_SGL, _nvme_ns_cmd_rw() can cause a split. + * However, _nvme_ns_cmd_split_request_sgl() and _nvme_ns_cmd_split_request_prp() + * do not always cause a request to be split. These functions verify payload size, + * verify num sge < max_sge, and verify SGE alignment rules (in case of PRPs). + * If any of the verifications fail, they will split the request. + * In our case, a split is very unlikely, since we already verified the size using + * nvme_ns_cmd_check_zone_append(), however, we still need to call these functions + * in order to perform the verification part. If they do cause a split, we return + * an error here. For proper requests, these functions will never cause a split. + */ + if (req->num_children) { + nvme_request_free_children(req); + nvme_free_request(req); + return -EINVAL; + } + return nvme_qpair_submit_request(qpair, req); + } else if (nvme_ns_check_request_length(lba_count, + ns->sectors_per_max_io, + ns->sectors_per_stripe, + qpair->ctrlr->opts.io_queue_requests)) { + return -EINVAL; + } else { + return -ENOMEM; + } +} + int spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer, void *metadata, uint64_t lba, diff --git a/lib/nvme/nvme_zns.c b/lib/nvme/nvme_zns.c index a92f21ca1..ad680cc36 100644 --- a/lib/nvme/nvme_zns.c +++ b/lib/nvme/nvme_zns.c @@ -92,6 +92,31 @@ spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpai cb_fn, cb_arg, io_flags, apptag_mask, apptag); } +int +spdk_nvme_zns_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn) +{ + return nvme_ns_cmd_zone_appendv_with_md(ns, qpair, zslba, lba_count, cb_fn, cb_arg, + io_flags, reset_sgl_fn, next_sge_fn, + NULL, 0, 0); +} + +int +spdk_nvme_zns_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t zslba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag) +{ + return nvme_ns_cmd_zone_appendv_with_md(ns, qpair, zslba, lba_count, cb_fn, cb_arg, + io_flags, reset_sgl_fn, next_sge_fn, + metadata, apptag_mask, apptag); +} + static int nvme_zns_zone_mgmt_recv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, uint32_t payload_size, uint64_t slba, diff --git a/lib/nvme/spdk_nvme.map b/lib/nvme/spdk_nvme.map index 2b9121b55..5b5091931 100644 --- a/lib/nvme/spdk_nvme.map +++ b/lib/nvme/spdk_nvme.map @@ -175,6 +175,8 @@ spdk_nvme_zns_ctrlr_get_max_zone_append_size; spdk_nvme_zns_zone_append; spdk_nvme_zns_zone_append_with_md; + spdk_nvme_zns_zone_appendv; + spdk_nvme_zns_zone_appendv_with_md; spdk_nvme_zns_close_zone; spdk_nvme_zns_finish_zone; spdk_nvme_zns_open_zone; diff --git a/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c b/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c index fb6eb6104..0e165aa97 100644 --- a/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c +++ b/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c @@ -1569,6 +1569,83 @@ test_nvme_ns_cmd_zone_append_with_md(void) free(metadata); } +static void +test_nvme_ns_cmd_zone_appendv_with_md(void) +{ + struct spdk_nvme_ns ns; + struct spdk_nvme_ctrlr ctrlr; + struct spdk_nvme_qpair qpair; + int rc = 0; + uint32_t lba_count; + uint32_t sector_size = 512; + uint32_t md_size = 128; + char *metadata = NULL; + uint64_t sge_length; + + metadata = malloc(md_size * 384); + SPDK_CU_ASSERT_FATAL(metadata != NULL); + + /* + * 512 byte data + 128 byte metadata + * Separate metadata buffer + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 256 blocks * 512 bytes per block = 128 KB I/O + * 128 KB I/O <= max zone append size. Test should pass. + */ + lba_count = 256; + sge_length = lba_count * sector_size; + prepare_for_test(&ns, &ctrlr, &qpair, sector_size, md_size, 256 * 1024, 0, false); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + rc = nvme_ns_cmd_zone_appendv_with_md(&ns, &qpair, 0x0, lba_count, NULL, &sge_length, 0, + nvme_request_reset_sgl, nvme_request_next_sge, metadata, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == 0); + SPDK_CU_ASSERT_FATAL(g_request != NULL); + SPDK_CU_ASSERT_FATAL(g_request->num_children == 0); + + CU_ASSERT(g_request->payload.md == metadata); + CU_ASSERT(g_request->md_size == lba_count * md_size); + CU_ASSERT(g_request->payload_size == lba_count * sector_size); + + CU_ASSERT(g_request->cmd.opc == SPDK_NVME_OPC_ZONE_APPEND); + CU_ASSERT(nvme_payload_type(&g_request->payload) == NVME_PAYLOAD_TYPE_SGL); + CU_ASSERT(g_request->payload.reset_sgl_fn == nvme_request_reset_sgl); + CU_ASSERT(g_request->payload.next_sge_fn == nvme_request_next_sge); + CU_ASSERT(g_request->payload.contig_or_cb_arg == &sge_length); + CU_ASSERT(g_request->cmd.nsid == ns.id); + + nvme_free_request(g_request); + cleanup_after_test(&qpair); + + /* + * 512 byte data + 128 byte metadata + * Separate metadata buffer + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 512 blocks * 512 bytes per block = 256 KB I/O + * 256 KB I/O > max zone append size. Test should fail. + */ + lba_count = 512; + sge_length = lba_count * sector_size; + prepare_for_test(&ns, &ctrlr, &qpair, sector_size, md_size, 256 * 1024, 0, false); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + + rc = nvme_ns_cmd_zone_appendv_with_md(&ns, &qpair, 0x0, lba_count, NULL, &sge_length, 0, + nvme_request_reset_sgl, nvme_request_next_sge, metadata, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == -EINVAL); + SPDK_CU_ASSERT_FATAL(g_request == NULL); + + cleanup_after_test(&qpair); + + free(metadata); +} + static void test_nvme_ns_cmd_read_with_md(void) { @@ -1850,6 +1927,7 @@ int main(int argc, char **argv) CU_ADD_TEST(suite, test_nvme_ns_cmd_writev); CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_append_with_md); + CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_appendv_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_comparev); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_and_write); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_with_md);