nvme: add support for ZNS zone append command

The Zone Append command is an optional command in the Zoned Namespace
Command Set.

Zone Append differs from a regular write, in that the command is not
given an exact LBA of where to write the data.

Instead the user has to set the zslba field to the start of a zone,
and the data will be appended to that zone.

The actual LBA where the data was stored is returned in the
spdk_nvme_cpl, where Dword0 contains 31:00 of the ALBA field,
and Dword1 contains bits 63:32 of the ALBA field.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Change-Id: Iabae1b3456bfbb62c07b63d79afe9a14e460fe83
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6013
Community-CI: Broadcom CI
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Niklas Cassel 2021-01-19 11:10:56 +00:00 committed by Tomasz Zawadzki
parent c078941ca1
commit aa6767fb14
6 changed files with 267 additions and 0 deletions

View File

@ -107,6 +107,64 @@ const struct spdk_nvme_zns_ctrlr_data *spdk_nvme_zns_ctrlr_get_data(struct spdk_
*/
uint32_t spdk_nvme_zns_ctrlr_get_max_zone_append_size(const struct spdk_nvme_ctrlr *ctrlr);
/**
* Submit a zone append I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any
* given time.
*
* \param ns NVMe namespace to submit the write I/O.
* \param qpair I/O queue pair to submit the request.
* \param buffer Virtual address pointer to the data payload buffer.
* \param zslba Zone Start LBA of the zone that we are appending to.
* \param lba_count Length (in sectors) for the write operation.
* \param cb_fn Callback function to invoke when the I/O is completed.
* \param cb_arg Argument to pass to the callback function.
* \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in
* spdk/nvme_spec.h, for this I/O.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
*/
int spdk_nvme_zns_zone_append(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags);
/**
* Submit a zone append I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any
* given time.
*
* \param ns NVMe namespace to submit the write I/O.
* \param qpair I/O queue pair to submit the request.
* \param buffer Virtual address pointer to the data payload buffer.
* \param metadata Virtual address pointer to the metadata payload, the length
* of metadata is specified by spdk_nvme_ns_get_md_size().
* \param zslba Zone Start LBA of the zone that we are appending to.
* \param lba_count Length (in sectors) for the write operation.
* \param cb_fn Callback function to invoke when the I/O is completed.
* \param cb_arg Argument to pass to the callback function.
* \param io_flags Set flags, defined by the SPDK_NVME_IO_FLAGS_* entries in
* spdk/nvme_spec.h, for this I/O.
* \param apptag_mask Application tag mask.
* \param apptag Application tag to use end-to-end protection information.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
*/
int spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag);
/**
* Submit a Close Zone operation to the specified NVMe namespace.
*

View File

@ -1038,6 +1038,10 @@ int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id,
struct spdk_nvme_ctrlr *ctrlr);
void nvme_ns_destruct(struct spdk_nvme_ns *ns);
int nvme_ns_update(struct spdk_nvme_ns *ns);
int nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag);
int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);
int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);

View File

@ -405,6 +405,13 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
req->payload_offset = payload_offset;
req->md_offset = md_offset;
/* Zone append commands cannot be split. */
if (opc == SPDK_NVME_OPC_ZONE_APPEND) {
assert(ns->csi == SPDK_NVME_CSI_ZNS);
_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
return req;
}
/*
* Intel DC P3*00 NVMe controllers benefit from driver-assisted striping.
* If this controller defines a stripe boundary and this I/O spans a stripe
@ -732,6 +739,62 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
}
}
static int
nvme_ns_cmd_check_zone_append(struct spdk_nvme_ns *ns, uint32_t lba_count, uint32_t io_flags)
{
uint32_t sector_size;
/* Not all NVMe Zoned Namespaces support the zone append command. */
if (!(ns->ctrlr->flags & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED)) {
return -EINVAL;
}
sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags);
/* Fail a too large zone append command early. */
if (lba_count * sector_size > ns->ctrlr->max_zone_append_size) {
return -EINVAL;
}
return 0;
}
int
nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
{
struct nvme_request *req;
struct nvme_payload payload;
int ret;
if (!_is_io_flags_valid(io_flags)) {
return -EINVAL;
}
ret = nvme_ns_cmd_check_zone_append(ns, lba_count, io_flags);
if (ret) {
return ret;
}
payload = NVME_PAYLOAD_CONTIG(buffer, metadata);
req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, zslba, lba_count, cb_fn, cb_arg,
SPDK_NVME_OPC_ZONE_APPEND,
io_flags, apptag_mask, apptag, false);
if (req != NULL) {
return nvme_qpair_submit_request(qpair, req);
} else if (nvme_ns_check_request_length(lba_count,
ns->sectors_per_max_io,
ns->sectors_per_stripe,
qpair->ctrlr->opts.io_queue_requests)) {
return -EINVAL;
} else {
return -ENOMEM;
}
}
int
spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t lba,

View File

@ -66,6 +66,26 @@ spdk_nvme_zns_ctrlr_get_max_zone_append_size(const struct spdk_nvme_ctrlr *ctrlr
return ctrlr->max_zone_append_size;
}
int
spdk_nvme_zns_zone_append(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags)
{
return nvme_ns_cmd_zone_append_with_md(ns, qpair, buffer, NULL, zslba, lba_count,
cb_fn, cb_arg, io_flags, 0, 0);
}
int
spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
{
return nvme_ns_cmd_zone_append_with_md(ns, qpair, buffer, metadata, zslba, lba_count,
cb_fn, cb_arg, io_flags, apptag_mask, apptag);
}
static int
nvme_zns_zone_mgmt_recv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *payload, uint32_t payload_size, uint64_t slba,

View File

@ -168,6 +168,8 @@
spdk_nvme_zns_ns_get_num_zones;
spdk_nvme_zns_ctrlr_get_data;
spdk_nvme_zns_ctrlr_get_max_zone_append_size;
spdk_nvme_zns_zone_append;
spdk_nvme_zns_zone_append_with_md;
spdk_nvme_zns_close_zone;
spdk_nvme_zns_finish_zone;
spdk_nvme_zns_open_zone;

View File

@ -1471,6 +1471,125 @@ test_nvme_ns_cmd_write_with_md(void)
free(metadata);
}
static void
test_nvme_ns_cmd_zone_append_with_md(void)
{
struct spdk_nvme_ns ns;
struct spdk_nvme_ctrlr ctrlr;
struct spdk_nvme_qpair qpair;
int rc = 0;
char *buffer = NULL;
char *metadata = NULL;
uint32_t block_size, md_size;
block_size = 512;
md_size = 128;
buffer = malloc((block_size + md_size) * 384);
SPDK_CU_ASSERT_FATAL(buffer != NULL);
metadata = malloc(md_size * 384);
SPDK_CU_ASSERT_FATAL(metadata != NULL);
/*
* 512 byte data + 128 byte metadata
* Separate metadata buffer
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 256 blocks * 512 bytes per block = 128 KB I/O
* 128 KB I/O <= max zone append size. Test should pass.
*/
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, false);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, metadata, 0x0, 256,
NULL, NULL, 0, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == 0);
SPDK_CU_ASSERT_FATAL(g_request != NULL);
SPDK_CU_ASSERT_FATAL(g_request->num_children == 0);
CU_ASSERT(g_request->payload.md == metadata);
CU_ASSERT(g_request->md_size == 256 * 128);
CU_ASSERT(g_request->payload_size == 256 * 512);
nvme_free_request(g_request);
cleanup_after_test(&qpair);
/*
* 512 byte data + 128 byte metadata
* Separate metadata buffer
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 512 blocks * 512 bytes per block = 256 KB I/O
* 256 KB I/O > max zone append size. Test should fail.
*/
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, false);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, metadata, 0x0, 512,
NULL, NULL, 0, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == -EINVAL);
SPDK_CU_ASSERT_FATAL(g_request == NULL);
cleanup_after_test(&qpair);
/*
* 512 byte data + 128 byte metadata
* Extended LBA
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 128 blocks * (512 + 128) bytes per block = 80 KB I/O
* 80 KB I/O <= max zone append size. Test should pass.
*/
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, true);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, NULL, 0x0, 128,
NULL, NULL, 0, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == 0);
SPDK_CU_ASSERT_FATAL(g_request != NULL);
SPDK_CU_ASSERT_FATAL(g_request->num_children == 0);
CU_ASSERT(g_request->payload.md == NULL);
CU_ASSERT(g_request->payload_offset == 0);
CU_ASSERT(g_request->payload_size == 128 * (512 + 128));
nvme_free_request(g_request);
cleanup_after_test(&qpair);
/*
* 512 byte data + 128 byte metadata
* Extended LBA
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 256 blocks * (512 + 128) bytes per block = 160 KB I/O
* 160 KB I/O > max zone append size. Test should fail.
*/
prepare_for_test(&ns, &ctrlr, &qpair, 512, 128, 256 * 1024, 0, true);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_append_with_md(&ns, &qpair, buffer, NULL, 0x0, 256,
NULL, NULL, 0, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == -EINVAL);
SPDK_CU_ASSERT_FATAL(g_request == NULL);
cleanup_after_test(&qpair);
free(buffer);
free(metadata);
}
static void
test_nvme_ns_cmd_read_with_md(void)
{
@ -1762,6 +1881,7 @@ int main(int argc, char **argv)
CU_ADD_TEST(suite, test_nvme_ns_cmd_read_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_writev);
CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_append_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_comparev);
CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_and_write);
CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_with_md);