From f4140ad0232e47fcaf1b06d3cfba90b17f6d8d2c Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Mon, 3 Oct 2016 10:37:48 -0700 Subject: [PATCH] nvme: Change the deallocate interface to generic dsm Provide a convenience wrapper for general purpose dataset management commands. The previous wrapper for deallocate was difficult to use correctly and only for deallocate. Note that the name is "dataset_management" as opposed to "data_set_management" to match the NVMe specification. It's questionable whether "dataset" is valid English, but it is best to match the specification. Change-Id: Ifc03d66dbabeabe8146968cf8a09f7ac3446ad68 Signed-off-by: Ben Walker --- doc/nvme/index.txt | 2 +- include/spdk/nvme.h | 32 +++++++++------ include/spdk/nvme_spec.h | 18 ++++++++- lib/bdev/nvme/blockdev_nvme.c | 19 ++++++--- lib/nvme/nvme_ns_cmd.c | 19 +++++---- .../nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c | 39 +++++++++++-------- 6 files changed, 87 insertions(+), 42 deletions(-) diff --git a/doc/nvme/index.txt b/doc/nvme/index.txt index 1eade9c86..3b2e9944a 100644 --- a/doc/nvme/index.txt +++ b/doc/nvme/index.txt @@ -46,7 +46,7 @@ Function | Description spdk_nvme_probe() | \copybrief spdk_nvme_probe() spdk_nvme_ns_cmd_read() | \copybrief spdk_nvme_ns_cmd_read() spdk_nvme_ns_cmd_write() | \copybrief spdk_nvme_ns_cmd_write() -spdk_nvme_ns_cmd_deallocate() | \copybrief spdk_nvme_ns_cmd_deallocate() +spdk_nvme_ns_cmd_dataset_management() | \copybrief spdk_nvme_ns_cmd_dataset_management() spdk_nvme_ns_cmd_flush() | \copybrief spdk_nvme_ns_cmd_flush() spdk_nvme_qpair_process_completions() | \copybrief spdk_nvme_qpair_process_completions() diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 9d7f4da1e..a98109b41 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -821,26 +821,36 @@ int spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpai uint16_t apptag_mask, uint16_t apptag); /** - * \brief Submits a deallocation request to the specified NVMe namespace. + * \brief Submits a data set management request to the specified NVMe namespace. Data set + * management operations are designed to optimize interaction with the block + * translation layer inside the device. The most common type of operation is + * deallocate, which is often referred to as TRIM or UNMAP. * - * \param ns NVMe namespace to submit the deallocation request + * \param ns NVMe namespace to submit the DSM request + * \param type A bit field constructed from \ref enum spdk_nvme_dsm_attribute. * \param qpair I/O queue pair to submit the request - * \param payload virtual address pointer to the list of LBA ranges to - * deallocate - * \param num_ranges number of ranges in the list pointed to by payload; must be - * between 1 and \ref SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES, inclusive. + * \param ranges An array of \ref spdk_nvme_dsm_range elements describing + the LBAs to operate on. + * \param num_ranges The number of elements in the ranges array. * \param cb_fn callback function to invoke when the I/O is completed * \param cb_arg argument to pass to the callback function * - * \return 0 if successfully submitted, ENOMEM if an nvme_request - * structure cannot be allocated for the I/O request + * \return 0 if successfully submitted, negated POSIX errno values otherwise. * * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). * The user must ensure that only one thread submits I/O on a given qpair at any given time. + * + * This is a convenience wrapper that will automatically allocate and construct the correct + * data buffers. Therefore, ranges does not need to be allocated from pinned memory and + * can be placed on the stack. If a higher performance, zero-copy version of DSM is + * required, simply build and submit a raw command using spdk_nvme_ctrlr_cmd_io_raw(). */ -int spdk_nvme_ns_cmd_deallocate(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, - void *payload, uint16_t num_ranges, - spdk_nvme_cmd_cb cb_fn, void *cb_arg); +int spdk_nvme_ns_cmd_dataset_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t type, + const struct spdk_nvme_dsm_range *ranges, + uint16_t num_ranges, + spdk_nvme_cmd_cb cb_fn, + void *cb_arg); /** * \brief Submits a flush request to the specified NVMe namespace. diff --git a/include/spdk/nvme_spec.h b/include/spdk/nvme_spec.h index a754e7f20..f5631d243 100644 --- a/include/spdk/nvme_spec.h +++ b/include/spdk/nvme_spec.h @@ -446,7 +446,23 @@ SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_cpl) == 16, "Incorrect size"); * Dataset Management range */ struct spdk_nvme_dsm_range { - uint32_t attributes; + union { + struct { + uint32_t af : 4; /**< access frequencey */ + uint32_t al : 2; /**< access latency */ + uint32_t reserved0 : 2; + + uint32_t sr : 1; /**< sequential read range */ + uint32_t sw : 1; /**< sequential write range */ + uint32_t wp : 1; /**< write prepare */ + uint32_t reserved1 : 13; + + uint32_t access_size : 8; /**< command access size */ + } bits; + + uint32_t raw; + } attributes; + uint32_t length; uint64_t starting_lba; }; diff --git a/lib/bdev/nvme/blockdev_nvme.c b/lib/bdev/nvme/blockdev_nvme.c index 16633ecbc..2bda00470 100644 --- a/lib/bdev/nvme/blockdev_nvme.c +++ b/lib/bdev/nvme/blockdev_nvme.c @@ -90,7 +90,7 @@ struct nvme_io_channel { #define NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT 1 struct nvme_blockio { - struct spdk_nvme_dsm_range dsm_range[NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT]; + int reserved; }; enum data_direction { @@ -679,16 +679,23 @@ blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct spdk_io_channel *ch, { struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); int rc = 0, i; + struct spdk_nvme_dsm_range dsm_range[NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT]; + + if (bdesc_count > NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT) { + return -1; + } for (i = 0; i < bdesc_count; i++) { - bio->dsm_range[i].starting_lba = - nbdev->lba_start + from_be64(&unmap_d->lba); - bio->dsm_range[i].length = from_be32(&unmap_d->block_count); + dsm_range[i].starting_lba = nbdev->lba_start + from_be64(&unmap_d->lba); + dsm_range[i].length = from_be32(&unmap_d->block_count); + dsm_range[i].attributes.raw = 0; unmap_d++; } - rc = spdk_nvme_ns_cmd_deallocate(nbdev->ns, nvme_ch->qpair, bio->dsm_range, bdesc_count, - queued_done, bio); + rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair, + SPDK_NVME_DSM_ATTR_DEALLOCATE, + dsm_range, bdesc_count, + queued_done, bio); if (rc != 0) return -1; diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 757614998..659681916 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -396,8 +396,10 @@ spdk_nvme_ns_cmd_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q } int -spdk_nvme_ns_cmd_deallocate(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload, - uint16_t num_ranges, spdk_nvme_cmd_cb cb_fn, void *cb_arg) +spdk_nvme_ns_cmd_dataset_management(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t type, + const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges, + spdk_nvme_cmd_cb cb_fn, void *cb_arg) { struct nvme_request *req; struct spdk_nvme_cmd *cmd; @@ -406,9 +408,13 @@ spdk_nvme_ns_cmd_deallocate(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpa return -EINVAL; } - req = nvme_allocate_request_contig(payload, - num_ranges * sizeof(struct spdk_nvme_dsm_range), - cb_fn, cb_arg); + if (ranges == NULL) { + return -EINVAL; + } + + req = nvme_allocate_request_user_copy((void *)ranges, + num_ranges * sizeof(struct spdk_nvme_dsm_range), + cb_fn, cb_arg, true); if (req == NULL) { return -ENOMEM; } @@ -417,9 +423,8 @@ spdk_nvme_ns_cmd_deallocate(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpa cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT; cmd->nsid = ns->id; - /* TODO: create a delete command data structure */ cmd->cdw10 = num_ranges - 1; - cmd->cdw11 = SPDK_NVME_DSM_ATTR_DEALLOCATE; + cmd->cdw11 = type; return nvme_qpair_submit_request(qpair, req); } diff --git a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c index 520573987..9d0a91754 100644 --- a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c @@ -448,45 +448,51 @@ test_nvme_ns_cmd_write_zeroes(void) } static void -test_nvme_ns_cmd_deallocate(void) +test_nvme_ns_cmd_dataset_management(void) { struct spdk_nvme_ns ns; struct spdk_nvme_ctrlr ctrlr; struct spdk_nvme_qpair qpair; spdk_nvme_cmd_cb cb_fn = NULL; void *cb_arg = NULL; - uint16_t num_ranges = 1; - void *payload = NULL; + struct spdk_nvme_dsm_range ranges[256]; + uint16_t i; int rc = 0; prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0); - payload = malloc(num_ranges * sizeof(struct spdk_nvme_dsm_range)); - rc = spdk_nvme_ns_cmd_deallocate(&ns, &qpair, payload, num_ranges, cb_fn, cb_arg); + for (i = 0; i < 256; i++) { + ranges[i].starting_lba = i; + ranges[i].length = 1; + ranges[i].attributes.raw = 0; + } + + /* TRIM one LBA */ + rc = spdk_nvme_ns_cmd_dataset_management(&ns, &qpair, SPDK_NVME_DSM_ATTR_DEALLOCATE, + ranges, 1, cb_fn, cb_arg); SPDK_CU_ASSERT_FATAL(rc == 0); SPDK_CU_ASSERT_FATAL(g_request != NULL); CU_ASSERT(g_request->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT); CU_ASSERT(g_request->cmd.nsid == ns.id); - CU_ASSERT(g_request->cmd.cdw10 == num_ranges - 1u); + CU_ASSERT(g_request->cmd.cdw10 == 0); CU_ASSERT(g_request->cmd.cdw11 == SPDK_NVME_DSM_ATTR_DEALLOCATE); - free(payload); + nvme_free(g_request->payload.u.contig); nvme_free_request(g_request); - num_ranges = 256; - payload = malloc(num_ranges * sizeof(struct spdk_nvme_dsm_range)); - rc = spdk_nvme_ns_cmd_deallocate(&ns, &qpair, payload, num_ranges, cb_fn, cb_arg); + /* TRIM 256 LBAs */ + rc = spdk_nvme_ns_cmd_dataset_management(&ns, &qpair, SPDK_NVME_DSM_ATTR_DEALLOCATE, + ranges, 256, cb_fn, cb_arg); SPDK_CU_ASSERT_FATAL(rc == 0); SPDK_CU_ASSERT_FATAL(g_request != NULL); CU_ASSERT(g_request->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT); CU_ASSERT(g_request->cmd.nsid == ns.id); - CU_ASSERT(g_request->cmd.cdw10 == num_ranges - 1u); + CU_ASSERT(g_request->cmd.cdw10 == 255u); CU_ASSERT(g_request->cmd.cdw11 == SPDK_NVME_DSM_ATTR_DEALLOCATE); - free(payload); + nvme_free(g_request->payload.u.contig); nvme_free_request(g_request); - payload = NULL; - num_ranges = 0; - rc = spdk_nvme_ns_cmd_deallocate(&ns, &qpair, payload, num_ranges, cb_fn, cb_arg); + rc = spdk_nvme_ns_cmd_dataset_management(&ns, &qpair, SPDK_NVME_DSM_ATTR_DEALLOCATE, + NULL, 0, cb_fn, cb_arg); CU_ASSERT(rc != 0); } @@ -782,7 +788,8 @@ int main(int argc, char **argv) || CU_add_test(suite, "split_test3", split_test3) == NULL || CU_add_test(suite, "split_test4", split_test4) == NULL || CU_add_test(suite, "nvme_ns_cmd_flush", test_nvme_ns_cmd_flush) == NULL - || CU_add_test(suite, "nvme_ns_cmd_deallocate", test_nvme_ns_cmd_deallocate) == NULL + || CU_add_test(suite, "nvme_ns_cmd_dataset_management", + test_nvme_ns_cmd_dataset_management) == NULL || CU_add_test(suite, "io_flags", test_io_flags) == NULL || CU_add_test(suite, "nvme_ns_cmd_write_zeroes", test_nvme_ns_cmd_write_zeroes) == NULL || CU_add_test(suite, "nvme_ns_cmd_reservation_register",