From aa6767fb1452365bbfa8dd5c0b9105fb3c3bcdf0 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 19 Jan 2021 11:10:56 +0000 Subject: [PATCH] nvme: add support for ZNS zone append command The Zone Append command is an optional command in the Zoned Namespace Command Set. Zone Append differs from a regular write, in that the command is not given an exact LBA of where to write the data. Instead the user has to set the zslba field to the start of a zone, and the data will be appended to that zone. The actual LBA where the data was stored is returned in the spdk_nvme_cpl, where Dword0 contains 31:00 of the ALBA field, and Dword1 contains bits 63:32 of the ALBA field. Signed-off-by: Niklas Cassel Change-Id: Iabae1b3456bfbb62c07b63d79afe9a14e460fe83 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6013 Community-CI: Broadcom CI Reviewed-by: Jim Harris Reviewed-by: Changpeng Liu Tested-by: SPDK CI Jenkins --- include/spdk/nvme_zns.h | 58 +++++++++ lib/nvme/nvme_internal.h | 4 + lib/nvme/nvme_ns_cmd.c | 63 +++++++++ lib/nvme/nvme_zns.c | 20 +++ lib/nvme/spdk_nvme.map | 2 + .../lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c | 120 ++++++++++++++++++ 6 files changed, 267 insertions(+) diff --git a/include/spdk/nvme_zns.h b/include/spdk/nvme_zns.h index 8624f1f7d..1fc190bdd 100644 --- a/include/spdk/nvme_zns.h +++ b/include/spdk/nvme_zns.h @@ -107,6 +107,64 @@ const struct spdk_nvme_zns_ctrlr_data *spdk_nvme_zns_ctrlr_get_data(struct spdk_ */ uint32_t spdk_nvme_zns_ctrlr_get_max_zone_append_size(const struct spdk_nvme_ctrlr *ctrlr); +/** + * Submit a zone append I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O. + * \param qpair I/O queue pair to submit the request. + * \param buffer Virtual address pointer to the data payload buffer. + * \param zslba Zone Start LBA of the zone that we are appending to. + * \param lba_count Length (in sectors) for the write operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in + * spdk/nvme_spec.h, for this I/O. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_zns_zone_append(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags); + +/** + * Submit a zone append I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O. + * \param qpair I/O queue pair to submit the request. + * \param buffer Virtual address pointer to the data payload buffer. + * \param metadata Virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size(). + * \param zslba Zone Start LBA of the zone that we are appending to. + * \param lba_count Length (in sectors) for the write operation. + * \param cb_fn Callback function to invoke when the I/O is completed. + * \param cb_arg Argument to pass to the callback function. + * \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in + * spdk/nvme_spec.h, for this I/O. + * \param apptag_mask Application tag mask. + * \param apptag Application tag to use end-to-end protection information. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + */ +int spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); + /** * Submit a Close Zone operation to the specified NVMe namespace. * diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index c57bf85e8..d745d084a 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -1038,6 +1038,10 @@ int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id, struct spdk_nvme_ctrlr *ctrlr); void nvme_ns_destruct(struct spdk_nvme_ns *ns); int nvme_ns_update(struct spdk_nvme_ns *ns); +int nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 2cd8ba49d..73246f80c 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -405,6 +405,13 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, req->payload_offset = payload_offset; req->md_offset = md_offset; + /* Zone append commands cannot be split. */ + if (opc == SPDK_NVME_OPC_ZONE_APPEND) { + assert(ns->csi == SPDK_NVME_CSI_ZNS); + _nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag); + return req; + } + /* * Intel DC P3*00 NVMe controllers benefit from driver-assisted striping. * If this controller defines a stripe boundary and this I/O spans a stripe @@ -732,6 +739,62 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, } } +static int +nvme_ns_cmd_check_zone_append(struct spdk_nvme_ns *ns, uint32_t lba_count, uint32_t io_flags) +{ + uint32_t sector_size; + + /* Not all NVMe Zoned Namespaces support the zone append command. */ + if (!(ns->ctrlr->flags & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED)) { + return -EINVAL; + } + + sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags); + + /* Fail a too large zone append command early. */ + if (lba_count * sector_size > ns->ctrlr->max_zone_append_size) { + return -EINVAL; + } + + return 0; +} + +int +nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag) +{ + struct nvme_request *req; + struct nvme_payload payload; + int ret; + + if (!_is_io_flags_valid(io_flags)) { + return -EINVAL; + } + + ret = nvme_ns_cmd_check_zone_append(ns, lba_count, io_flags); + if (ret) { + return ret; + } + + payload = NVME_PAYLOAD_CONTIG(buffer, metadata); + + req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, zslba, lba_count, cb_fn, cb_arg, + SPDK_NVME_OPC_ZONE_APPEND, + io_flags, apptag_mask, apptag, false); + if (req != NULL) { + return nvme_qpair_submit_request(qpair, req); + } else if (nvme_ns_check_request_length(lba_count, + ns->sectors_per_max_io, + ns->sectors_per_stripe, + qpair->ctrlr->opts.io_queue_requests)) { + return -EINVAL; + } else { + return -ENOMEM; + } +} + int spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer, void *metadata, uint64_t lba, diff --git a/lib/nvme/nvme_zns.c b/lib/nvme/nvme_zns.c index bdfffa344..dbcecad24 100644 --- a/lib/nvme/nvme_zns.c +++ b/lib/nvme/nvme_zns.c @@ -66,6 +66,26 @@ spdk_nvme_zns_ctrlr_get_max_zone_append_size(const struct spdk_nvme_ctrlr *ctrlr return ctrlr->max_zone_append_size; } +int +spdk_nvme_zns_zone_append(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags) +{ + return nvme_ns_cmd_zone_append_with_md(ns, qpair, buffer, NULL, zslba, lba_count, + cb_fn, cb_arg, io_flags, 0, 0); +} + +int +spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + void *buffer, void *metadata, uint64_t zslba, + uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag) +{ + return nvme_ns_cmd_zone_append_with_md(ns, qpair, buffer, metadata, zslba, lba_count, + cb_fn, cb_arg, io_flags, apptag_mask, apptag); +} + static int nvme_zns_zone_mgmt_recv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, uint32_t payload_size, uint64_t slba, diff --git a/lib/nvme/spdk_nvme.map b/lib/nvme/spdk_nvme.map index 2ddf47eca..e2e4da623 100644 --- a/lib/nvme/spdk_nvme.map +++ b/lib/nvme/spdk_nvme.map @@ -168,6 +168,8 @@ spdk_nvme_zns_ns_get_num_zones; spdk_nvme_zns_ctrlr_get_data; spdk_nvme_zns_ctrlr_get_max_zone_append_size; + spdk_nvme_zns_zone_append; + spdk_nvme_zns_zone_append_with_md; spdk_nvme_zns_close_zone; spdk_nvme_zns_finish_zone; spdk_nvme_zns_open_zone; diff --git a/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c b/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c index 8a88c6503..e352da0f9 100644 --- a/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c +++ b/test/unit/lib/nvme/nvme_ns_cmd.c/nvme_ns_cmd_ut.c @@ -1471,6 +1471,125 @@ test_nvme_ns_cmd_write_with_md(void) free(metadata); } +static void +test_nvme_ns_cmd_zone_append_with_md(void) +{ + struct spdk_nvme_ns ns; + struct spdk_nvme_ctrlr ctrlr; + struct spdk_nvme_qpair qpair; + int rc = 0; + char *buffer = NULL; + char *metadata = NULL; + uint32_t block_size, md_size; + + block_size = 512; + md_size = 128; + + buffer = malloc((block_size + md_size) * 384); + SPDK_CU_ASSERT_FATAL(buffer != NULL); + metadata = malloc(md_size * 384); + SPDK_CU_ASSERT_FATAL(metadata != NULL); + + /* + * 512 byte data + 128 byte metadata + * Separate metadata buffer + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 256 blocks * 512 bytes per block = 128 KB I/O + * 128 KB I/O <= max zone append size. Test should pass. + */ + prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, false); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + + rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, metadata, 0x0, 256, + NULL, NULL, 0, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == 0); + SPDK_CU_ASSERT_FATAL(g_request != NULL); + SPDK_CU_ASSERT_FATAL(g_request->num_children == 0); + + CU_ASSERT(g_request->payload.md == metadata); + CU_ASSERT(g_request->md_size == 256 * 128); + CU_ASSERT(g_request->payload_size == 256 * 512); + + nvme_free_request(g_request); + cleanup_after_test(&qpair); + + /* + * 512 byte data + 128 byte metadata + * Separate metadata buffer + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 512 blocks * 512 bytes per block = 256 KB I/O + * 256 KB I/O > max zone append size. Test should fail. + */ + prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, false); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + + rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, metadata, 0x0, 512, + NULL, NULL, 0, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == -EINVAL); + SPDK_CU_ASSERT_FATAL(g_request == NULL); + + cleanup_after_test(&qpair); + + /* + * 512 byte data + 128 byte metadata + * Extended LBA + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 128 blocks * (512 + 128) bytes per block = 80 KB I/O + * 80 KB I/O <= max zone append size. Test should pass. + */ + prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, true); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + + rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, NULL, 0x0, 128, + NULL, NULL, 0, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == 0); + SPDK_CU_ASSERT_FATAL(g_request != NULL); + SPDK_CU_ASSERT_FATAL(g_request->num_children == 0); + + CU_ASSERT(g_request->payload.md == NULL); + CU_ASSERT(g_request->payload_offset == 0); + CU_ASSERT(g_request->payload_size == 128 * (512 + 128)); + + nvme_free_request(g_request); + cleanup_after_test(&qpair); + + /* + * 512 byte data + 128 byte metadata + * Extended LBA + * Max data transfer size 256 KB + * Max zone append size 128 KB + * + * 256 blocks * (512 + 128) bytes per block = 160 KB I/O + * 160 KB I/O > max zone append size. Test should fail. + */ + prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, true); + ctrlr.max_zone_append_size = 128 * 1024; + ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED; + ns.csi = SPDK_NVME_CSI_ZNS; + + rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, NULL, 0x0, 256, + NULL, NULL, 0, 0, 0); + SPDK_CU_ASSERT_FATAL(rc == -EINVAL); + SPDK_CU_ASSERT_FATAL(g_request == NULL); + + cleanup_after_test(&qpair); + + free(buffer); + free(metadata); +} + static void test_nvme_ns_cmd_read_with_md(void) { @@ -1762,6 +1881,7 @@ int main(int argc, char **argv) CU_ADD_TEST(suite, test_nvme_ns_cmd_read_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_writev); CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md); + CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_append_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_comparev); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_and_write); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_with_md);