nvme: add support for ZNS zone append vector variant

We already have support for spdk_nvme_zns_zone_append(),
add support for spdk_nvme_zns_zone_appendv() (zone append with
NVME_PAYLOAD_TYPE_SGL).

_nvme_ns_cmd_rw() currently performs verification of the SGL,
if the parameter check_sgl is set. This parameter is set for all
calls with payload of type NVME_PAYLOAD_TYPE_SGL.

In order to be able to perform the same check_sgl verfication on
zone append vectors, we need to refactor _nvme_ns_cmd_rw() a bit.

Setting check_sgl ensures that _nvme_ns_cmd_split_request_sgl() or
_nvme_ns_cmd_split_request_prp() gets called.

These functions will split an oversized I/O into several different
requests. However, they also iterate the SGE entries, verifies that
the total payload size, total SGE entries is not too many, and that
buffers are properly aligned. A proper request will not get split.

For zone append, splitting a request into several is not allowed,
however, we still want the verification part to be done, such that
(e.g.) a non first/last SGE which is not page aligned, will cause
the whole request to be rejected.
(In the case of spdk_nvme_ns_cmd_write(), a non first/last SGE which
is not page aligned will instead cause the request to be split.)

An alternative would be to try to rip out the verification part from
_nvme_ns_cmd_split_request_sgl() and _nvme_ns_cmd_split_request_prp().
However, that is non-trivial, and would most likely end up with a lot
of duplicated code, which would easily get out of sync.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Change-Id: I2728acdcadeb70b1f0ed628704df19e75d14dcca
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6248
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Niklas Cassel 2021-01-26 09:42:58 +00:00 committed by Tomasz Zawadzki
parent b1b4b8676f
commit 014baeb8ef
6 changed files with 239 additions and 2 deletions

View File

@ -177,6 +177,69 @@ int spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag);
/**
* Submit a zone append I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any
* given time.
*
* \param ns NVMe namespace to submit the zone append I/O.
* \param qpair I/O queue pair to submit the request.
* \param zslba Zone Start LBA of the zone that we are appending to.
* \param lba_count Length (in sectors) for the zone append operation.
* \param cb_fn Callback function to invoke when the I/O is completed.
* \param cb_arg Argument to pass to the callback function.
* \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
* \param reset_sgl_fn Callback function to reset scattered payload.
* \param next_sge_fn Callback function to iterate each scattered payload memory
* segment.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
*/
int spdk_nvme_zns_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn);
/**
* Submit a zone append I/O to the specified NVMe namespace.
*
* The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
* The user must ensure that only one thread submits I/O on a given qpair at any
* given time.
*
* \param ns NVMe namespace to submit the zone append I/O.
* \param qpair I/O queue pair to submit the request.
* \param zslba Zone Start LBA of the zone that we are appending to.
* \param lba_count Length (in sectors) for the zone append operation.
* \param cb_fn Callback function to invoke when the I/O is completed.
* \param cb_arg Argument to pass to the callback function.
* \param io_flags Set flags, defined in nvme_spec.h, for this I/O.
* \param reset_sgl_fn Callback function to reset scattered payload.
* \param next_sge_fn Callback function to iterate each scattered payload memory
* segment.
* \param metadata Virtual address pointer to the metadata payload, the length
* of metadata is specified by spdk_nvme_ns_get_md_size().
* \param apptag_mask Application tag mask.
* \param apptag Application tag to use end-to-end protection information.
*
* \return 0 if successfully submitted, negated errnos on the following error conditions:
* -EINVAL: The request is malformed.
* -ENOMEM: The request cannot be allocated.
* -ENXIO: The qpair is failed at the transport level.
*/
int spdk_nvme_zns_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag);
/** /**
* Submit a Close Zone operation to the specified NVMe namespace. * Submit a Close Zone operation to the specified NVMe namespace.
* *

View File

@ -1050,6 +1050,12 @@ int nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qp
void *buffer, void *metadata, uint64_t zslba, void *buffer, void *metadata, uint64_t zslba,
uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag); uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag);
int nvme_ns_cmd_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag);
int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);
int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);

View File

@ -416,8 +416,12 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
/* Zone append commands cannot be split. */ /* Zone append commands cannot be split. */
if (opc == SPDK_NVME_OPC_ZONE_APPEND) { if (opc == SPDK_NVME_OPC_ZONE_APPEND) {
assert(ns->csi == SPDK_NVME_CSI_ZNS); assert(ns->csi == SPDK_NVME_CSI_ZNS);
_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag); /*
return req; * As long as we disable driver-assisted striping for Zone append commands,
* _nvme_ns_cmd_rw() should never cause a proper request to be split.
* If a request is split, after all, error handling is done in caller functions.
*/
sectors_per_stripe = 0;
} }
/* /*
@ -815,6 +819,65 @@ nvme_ns_cmd_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair
} }
} }
int
nvme_ns_cmd_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag)
{
struct nvme_request *req;
struct nvme_payload payload;
int ret;
if (!_is_io_flags_valid(io_flags)) {
return -EINVAL;
}
if (reset_sgl_fn == NULL || next_sge_fn == NULL) {
return -EINVAL;
}
ret = nvme_ns_cmd_check_zone_append(ns, lba_count, io_flags);
if (ret) {
return ret;
}
payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata);
req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, zslba, lba_count, cb_fn, cb_arg,
SPDK_NVME_OPC_ZONE_APPEND,
io_flags, apptag_mask, apptag, true);
if (req != NULL) {
/*
* Zone append commands cannot be split (num_children has to be 0).
* For NVME_PAYLOAD_TYPE_SGL, _nvme_ns_cmd_rw() can cause a split.
* However, _nvme_ns_cmd_split_request_sgl() and _nvme_ns_cmd_split_request_prp()
* do not always cause a request to be split. These functions verify payload size,
* verify num sge < max_sge, and verify SGE alignment rules (in case of PRPs).
* If any of the verifications fail, they will split the request.
* In our case, a split is very unlikely, since we already verified the size using
* nvme_ns_cmd_check_zone_append(), however, we still need to call these functions
* in order to perform the verification part. If they do cause a split, we return
* an error here. For proper requests, these functions will never cause a split.
*/
if (req->num_children) {
nvme_request_free_children(req);
nvme_free_request(req);
return -EINVAL;
}
return nvme_qpair_submit_request(qpair, req);
} else if (nvme_ns_check_request_length(lba_count,
ns->sectors_per_max_io,
ns->sectors_per_stripe,
qpair->ctrlr->opts.io_queue_requests)) {
return -EINVAL;
} else {
return -ENOMEM;
}
}
int int
spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *buffer, void *metadata, uint64_t lba, void *buffer, void *metadata, uint64_t lba,

View File

@ -92,6 +92,31 @@ spdk_nvme_zns_zone_append_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpai
cb_fn, cb_arg, io_flags, apptag_mask, apptag); cb_fn, cb_arg, io_flags, apptag_mask, apptag);
} }
int
spdk_nvme_zns_zone_appendv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn)
{
return nvme_ns_cmd_zone_appendv_with_md(ns, qpair, zslba, lba_count, cb_fn, cb_arg,
io_flags, reset_sgl_fn, next_sge_fn,
NULL, 0, 0);
}
int
spdk_nvme_zns_zone_appendv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
uint64_t zslba, uint32_t lba_count,
spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
uint16_t apptag_mask, uint16_t apptag)
{
return nvme_ns_cmd_zone_appendv_with_md(ns, qpair, zslba, lba_count, cb_fn, cb_arg,
io_flags, reset_sgl_fn, next_sge_fn,
metadata, apptag_mask, apptag);
}
static int static int
nvme_zns_zone_mgmt_recv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, nvme_zns_zone_mgmt_recv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
void *payload, uint32_t payload_size, uint64_t slba, void *payload, uint32_t payload_size, uint64_t slba,

View File

@ -175,6 +175,8 @@
spdk_nvme_zns_ctrlr_get_max_zone_append_size; spdk_nvme_zns_ctrlr_get_max_zone_append_size;
spdk_nvme_zns_zone_append; spdk_nvme_zns_zone_append;
spdk_nvme_zns_zone_append_with_md; spdk_nvme_zns_zone_append_with_md;
spdk_nvme_zns_zone_appendv;
spdk_nvme_zns_zone_appendv_with_md;
spdk_nvme_zns_close_zone; spdk_nvme_zns_close_zone;
spdk_nvme_zns_finish_zone; spdk_nvme_zns_finish_zone;
spdk_nvme_zns_open_zone; spdk_nvme_zns_open_zone;

View File

@ -1569,6 +1569,83 @@ test_nvme_ns_cmd_zone_append_with_md(void)
free(metadata); free(metadata);
} }
static void
test_nvme_ns_cmd_zone_appendv_with_md(void)
{
struct spdk_nvme_ns ns;
struct spdk_nvme_ctrlr ctrlr;
struct spdk_nvme_qpair qpair;
int rc = 0;
uint32_t lba_count;
uint32_t sector_size = 512;
uint32_t md_size = 128;
char *metadata = NULL;
uint64_t sge_length;
metadata = malloc(md_size * 384);
SPDK_CU_ASSERT_FATAL(metadata != NULL);
/*
* 512 byte data + 128 byte metadata
* Separate metadata buffer
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 256 blocks * 512 bytes per block = 128 KB I/O
* 128 KB I/O <= max zone append size. Test should pass.
*/
lba_count = 256;
sge_length = lba_count * sector_size;
prepare_for_test(&ns, &ctrlr, &qpair, sector_size, md_size, 256 * 1024, 0, false);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_appendv_with_md(&ns, &qpair, 0x0, lba_count, NULL, &sge_length, 0,
nvme_request_reset_sgl, nvme_request_next_sge, metadata, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == 0);
SPDK_CU_ASSERT_FATAL(g_request != NULL);
SPDK_CU_ASSERT_FATAL(g_request->num_children == 0);
CU_ASSERT(g_request->payload.md == metadata);
CU_ASSERT(g_request->md_size == lba_count * md_size);
CU_ASSERT(g_request->payload_size == lba_count * sector_size);
CU_ASSERT(g_request->cmd.opc == SPDK_NVME_OPC_ZONE_APPEND);
CU_ASSERT(nvme_payload_type(&g_request->payload) == NVME_PAYLOAD_TYPE_SGL);
CU_ASSERT(g_request->payload.reset_sgl_fn == nvme_request_reset_sgl);
CU_ASSERT(g_request->payload.next_sge_fn == nvme_request_next_sge);
CU_ASSERT(g_request->payload.contig_or_cb_arg == &sge_length);
CU_ASSERT(g_request->cmd.nsid == ns.id);
nvme_free_request(g_request);
cleanup_after_test(&qpair);
/*
* 512 byte data + 128 byte metadata
* Separate metadata buffer
* Max data transfer size 256 KB
* Max zone append size 128 KB
*
* 512 blocks * 512 bytes per block = 256 KB I/O
* 256 KB I/O > max zone append size. Test should fail.
*/
lba_count = 512;
sge_length = lba_count * sector_size;
prepare_for_test(&ns, &ctrlr, &qpair, sector_size, md_size, 256 * 1024, 0, false);
ctrlr.max_zone_append_size = 128 * 1024;
ctrlr.flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
ns.csi = SPDK_NVME_CSI_ZNS;
rc = nvme_ns_cmd_zone_appendv_with_md(&ns, &qpair, 0x0, lba_count, NULL, &sge_length, 0,
nvme_request_reset_sgl, nvme_request_next_sge, metadata, 0, 0);
SPDK_CU_ASSERT_FATAL(rc == -EINVAL);
SPDK_CU_ASSERT_FATAL(g_request == NULL);
cleanup_after_test(&qpair);
free(metadata);
}
static void static void
test_nvme_ns_cmd_read_with_md(void) test_nvme_ns_cmd_read_with_md(void)
{ {
@ -1850,6 +1927,7 @@ int main(int argc, char **argv)
CU_ADD_TEST(suite, test_nvme_ns_cmd_writev); CU_ADD_TEST(suite, test_nvme_ns_cmd_writev);
CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_append_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_append_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_zone_appendv_with_md);
CU_ADD_TEST(suite, test_nvme_ns_cmd_comparev); CU_ADD_TEST(suite, test_nvme_ns_cmd_comparev);
CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_and_write); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_and_write);
CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_with_md); CU_ADD_TEST(suite, test_nvme_ns_cmd_compare_with_md);