From 6127461c934a16e6935990be76433926b6321c8b Mon Sep 17 00:00:00 2001 From: matthewb Date: Mon, 12 Apr 2021 08:03:51 -0400 Subject: [PATCH] lib/bdev: Added iov to spdk_bdev_zcopy_start Adding iov to the spdk_bdev_zcopy_start function enable spdk_bdev_zcopy_start to be used by transport layers as the iov is owned by the transport command Signed-off-by: matthewb Change-Id: I6d2be7f49566048bf25b7711ada8d2fb49fea6ee Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6816 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Aleksey Marchuk Reviewed-by: Michael Haeuptle Reviewed-by: Ben Walker --- CHANGELOG.md | 2 + include/spdk/bdev.h | 4 +- lib/bdev/bdev.c | 5 +- lib/bdev/part.c | 2 +- module/bdev/passthru/vbdev_passthru.c | 3 +- test/bdev/bdevperf/bdevperf.c | 4 +- test/external_code/passthru/vbdev_passthru.c | 3 +- test/unit/lib/bdev/bdev.c/bdev_ut.c | 220 ++++++++++++++++++- 8 files changed, 234 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad8c161bb..cc4929eed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,8 @@ Rados Cluster names. Removed ZCOPY emulation: The bdev module can be checked to see if it supports ZCOPY and if not supported then use existing READ/WRITE commands. +Added iov to spdk_bdev_zcopy_start + ## v21.04: ### accel diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 36cdb02a3..a7fabc790 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -1226,6 +1226,8 @@ int spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spd * * \param desc Block device descriptor * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list to be populated with the buffers + * \param iovcnt The maximum number of elements in iov. * \param offset_blocks The offset, in blocks, from the start of the block device. * \param num_blocks The number of blocks. * \param populate Whether the data buffer should be populated with the @@ -1239,11 +1241,11 @@ int spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spd * negated errno on failure, in which case the callback will not be called. */ int spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks, bool populate, spdk_bdev_io_completion_cb cb, void *cb_arg); - /** * Submit a request to release a data buffer representing a range of blocks. * diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index ac6bc014a..4a4a4573d 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -4315,6 +4315,7 @@ spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io int spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks, bool populate, spdk_bdev_io_completion_cb cb, void *cb_arg) @@ -4345,8 +4346,8 @@ spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY; bdev_io->u.bdev.num_blocks = num_blocks; bdev_io->u.bdev.offset_blocks = offset_blocks; - bdev_io->u.bdev.iovs = NULL; - bdev_io->u.bdev.iovcnt = 0; + bdev_io->u.bdev.iovs = iov; + bdev_io->u.bdev.iovcnt = iovcnt; bdev_io->u.bdev.md_buf = NULL; bdev_io->u.bdev.zcopy.populate = populate ? 1 : 0; bdev_io->u.bdev.zcopy.commit = 0; diff --git a/lib/bdev/part.c b/lib/bdev/part.c index 917874e12..1922e89e0 100644 --- a/lib/bdev/part.c +++ b/lib/bdev/part.c @@ -374,7 +374,7 @@ spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bde bdev_part_complete_io, bdev_io); break; case SPDK_BDEV_IO_TYPE_ZCOPY: - rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset, + rc = spdk_bdev_zcopy_start(base_desc, base_ch, NULL, 0, remapped_offset, bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, bdev_part_complete_zcopy_io, bdev_io); break; diff --git a/module/bdev/passthru/vbdev_passthru.c b/module/bdev/passthru/vbdev_passthru.c index 5d72bce46..06d245b63 100644 --- a/module/bdev/passthru/vbdev_passthru.c +++ b/module/bdev/passthru/vbdev_passthru.c @@ -347,7 +347,8 @@ vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *b _pt_complete_io, bdev_io); break; case SPDK_BDEV_IO_TYPE_ZCOPY: - rc = spdk_bdev_zcopy_start(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.offset_blocks, + rc = spdk_bdev_zcopy_start(pt_node->base_desc, pt_ch->base_ch, NULL, 0, + bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, _pt_complete_zcopy_io, bdev_io); break; diff --git a/test/bdev/bdevperf/bdevperf.c b/test/bdev/bdevperf/bdevperf.c index df50a11b6..c07d7b684 100644 --- a/test/bdev/bdevperf/bdevperf.c +++ b/test/bdev/bdevperf/bdevperf.c @@ -716,7 +716,7 @@ bdevperf_submit_task(void *arg) break; case SPDK_BDEV_IO_TYPE_READ: if (g_zcopy) { - rc = spdk_bdev_zcopy_start(desc, ch, task->offset_blocks, job->io_size_blocks, + rc = spdk_bdev_zcopy_start(desc, ch, NULL, 0, task->offset_blocks, job->io_size_blocks, true, bdevperf_zcopy_populate_complete, task); } else { if (spdk_bdev_is_md_separate(job->bdev)) { @@ -803,7 +803,7 @@ bdevperf_prep_zcopy_write_task(void *arg) struct bdevperf_job *job = task->job; int rc; - rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch, + rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch, NULL, 0, task->offset_blocks, job->io_size_blocks, false, bdevperf_zcopy_get_buf_complete, task); if (rc != 0) { diff --git a/test/external_code/passthru/vbdev_passthru.c b/test/external_code/passthru/vbdev_passthru.c index 80e41162c..54e010142 100644 --- a/test/external_code/passthru/vbdev_passthru.c +++ b/test/external_code/passthru/vbdev_passthru.c @@ -347,7 +347,8 @@ vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *b _pt_complete_io, bdev_io); break; case SPDK_BDEV_IO_TYPE_ZCOPY: - rc = spdk_bdev_zcopy_start(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.offset_blocks, + rc = spdk_bdev_zcopy_start(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, _pt_complete_zcopy_io, bdev_io); break; diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 54c555f7b..a0350bafa 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -116,6 +116,11 @@ static void *g_compare_write_buf; static uint32_t g_compare_write_buf_len; static bool g_abort_done; static enum spdk_bdev_io_status g_abort_status; +static void *g_zcopy_read_buf; +static uint32_t g_zcopy_read_buf_len; +static void *g_zcopy_write_buf; +static uint32_t g_zcopy_write_buf_len; +static struct spdk_bdev_io *g_zcopy_bdev_io; static struct ut_expected_io * ut_alloc_expected_io(uint8_t type, uint64_t offset, uint64_t length, int iovcnt) @@ -190,6 +195,43 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) } } + if (bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) { + if (bdev_io->u.bdev.zcopy.start) { + g_zcopy_bdev_io = bdev_io; + if (bdev_io->u.bdev.zcopy.populate) { + /* Start of a read */ + CU_ASSERT(g_zcopy_read_buf != NULL); + CU_ASSERT(g_zcopy_read_buf_len > 0); + bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_read_buf; + bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_read_buf_len; + bdev_io->u.bdev.iovcnt = 1; + } else { + /* Start of a write */ + CU_ASSERT(g_zcopy_write_buf != NULL); + CU_ASSERT(g_zcopy_write_buf_len > 0); + bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_write_buf; + bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_write_buf_len; + bdev_io->u.bdev.iovcnt = 1; + } + } else { + if (bdev_io->u.bdev.zcopy.commit) { + /* End of write */ + CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_write_buf); + CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_write_buf_len); + CU_ASSERT(bdev_io->u.bdev.iovcnt == 1); + g_zcopy_write_buf = NULL; + g_zcopy_write_buf_len = 0; + } else { + /* End of read */ + CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_read_buf); + CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_read_buf_len); + CU_ASSERT(bdev_io->u.bdev.iovcnt == 1); + g_zcopy_read_buf = NULL; + g_zcopy_read_buf_len = 0; + } + } + } + TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link); ch->outstanding_io_count++; @@ -856,7 +898,13 @@ io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { g_io_done = true; g_io_status = bdev_io->internal.status; - spdk_bdev_free_io(bdev_io); + if ((bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) && + (bdev_io->u.bdev.zcopy.start)) { + g_zcopy_bdev_io = bdev_io; + } else { + spdk_bdev_free_io(bdev_io); + g_zcopy_bdev_io = NULL; + } } static void @@ -3442,6 +3490,174 @@ bdev_write_zeroes(void) poll_threads(); } +static void +bdev_zcopy_write(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc = NULL; + struct spdk_io_channel *ioch; + struct ut_expected_io *expected_io; + uint64_t offset, num_blocks; + uint32_t num_completed; + char aa_buf[512]; + struct iovec iov; + int rc; + const bool populate = false; + const bool commit = true; + + memset(aa_buf, 0xaa, sizeof(aa_buf)); + + spdk_bdev_initialize(bdev_init_cb, NULL); + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc); + CU_ASSERT_EQUAL(rc, 0); + SPDK_CU_ASSERT_FATAL(desc != NULL); + CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc)); + ioch = spdk_bdev_get_io_channel(desc); + SPDK_CU_ASSERT_FATAL(ioch != NULL); + + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + offset = 50; + num_blocks = 1; + iov.iov_base = NULL; + iov.iov_len = 0; + + g_zcopy_read_buf = (void *) 0x1122334455667788UL; + g_zcopy_read_buf_len = (uint32_t) -1; + /* Do a zcopy start for a write (populate=false) */ + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + g_io_done = false; + g_zcopy_write_buf = aa_buf; + g_zcopy_write_buf_len = sizeof(aa_buf); + g_zcopy_bdev_io = NULL; + rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + num_completed = stub_complete_io(1); + CU_ASSERT_EQUAL(num_completed, 1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS); + /* Check that the iov has been set up */ + CU_ASSERT(iov.iov_base == g_zcopy_write_buf); + CU_ASSERT(iov.iov_len == g_zcopy_write_buf_len); + /* Check that the bdev_io has been saved */ + CU_ASSERT(g_zcopy_bdev_io != NULL); + /* Now do the zcopy end for a write (commit=true) */ + g_io_done = false; + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + num_completed = stub_complete_io(1); + CU_ASSERT_EQUAL(num_completed, 1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS); + /* Check the g_zcopy are reset by io_done */ + CU_ASSERT(g_zcopy_write_buf == NULL); + CU_ASSERT(g_zcopy_write_buf_len == 0); + /* Check that io_done has freed the g_zcopy_bdev_io */ + CU_ASSERT(g_zcopy_bdev_io == NULL); + + /* Check the zcopy read buffer has not been touched which + * ensures that the correct buffers were used. + */ + CU_ASSERT(g_zcopy_read_buf == (void *) 0x1122334455667788UL); + CU_ASSERT(g_zcopy_read_buf_len == (uint32_t) -1); + + spdk_put_io_channel(ioch); + spdk_bdev_close(desc); + free_bdev(bdev); + spdk_bdev_finish(bdev_fini_cb, NULL); + poll_threads(); +} + +static void +bdev_zcopy_read(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc = NULL; + struct spdk_io_channel *ioch; + struct ut_expected_io *expected_io; + uint64_t offset, num_blocks; + uint32_t num_completed; + char aa_buf[512]; + struct iovec iov; + int rc; + const bool populate = true; + const bool commit = false; + + memset(aa_buf, 0xaa, sizeof(aa_buf)); + + spdk_bdev_initialize(bdev_init_cb, NULL); + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc); + CU_ASSERT_EQUAL(rc, 0); + SPDK_CU_ASSERT_FATAL(desc != NULL); + CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc)); + ioch = spdk_bdev_get_io_channel(desc); + SPDK_CU_ASSERT_FATAL(ioch != NULL); + + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + offset = 50; + num_blocks = 1; + iov.iov_base = NULL; + iov.iov_len = 0; + + g_zcopy_write_buf = (void *) 0x1122334455667788UL; + g_zcopy_write_buf_len = (uint32_t) -1; + + /* Do a zcopy start for a read (populate=true) */ + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + g_io_done = false; + g_zcopy_read_buf = aa_buf; + g_zcopy_read_buf_len = sizeof(aa_buf); + g_zcopy_bdev_io = NULL; + rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + num_completed = stub_complete_io(1); + CU_ASSERT_EQUAL(num_completed, 1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS); + /* Check that the iov has been set up */ + CU_ASSERT(iov.iov_base == g_zcopy_read_buf); + CU_ASSERT(iov.iov_len == g_zcopy_read_buf_len); + /* Check that the bdev_io has been saved */ + CU_ASSERT(g_zcopy_bdev_io != NULL); + + /* Now do the zcopy end for a read (commit=false) */ + g_io_done = false; + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + num_completed = stub_complete_io(1); + CU_ASSERT_EQUAL(num_completed, 1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS); + /* Check the g_zcopy are reset by io_done */ + CU_ASSERT(g_zcopy_read_buf == NULL); + CU_ASSERT(g_zcopy_read_buf_len == 0); + /* Check that io_done has freed the g_zcopy_bdev_io */ + CU_ASSERT(g_zcopy_bdev_io == NULL); + + /* Check the zcopy write buffer has not been touched which + * ensures that the correct buffers were used. + */ + CU_ASSERT(g_zcopy_write_buf == (void *) 0x1122334455667788UL); + CU_ASSERT(g_zcopy_write_buf_len == (uint32_t) -1); + + spdk_put_io_channel(ioch); + spdk_bdev_close(desc); + free_bdev(bdev); + spdk_bdev_finish(bdev_fini_cb, NULL); + poll_threads(); +} + static void bdev_open_while_hotremove(void) { @@ -4495,6 +4711,8 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_write_zeroes); CU_ADD_TEST(suite, bdev_compare_and_write); CU_ADD_TEST(suite, bdev_compare); + CU_ADD_TEST(suite, bdev_zcopy_write); + CU_ADD_TEST(suite, bdev_zcopy_read); CU_ADD_TEST(suite, bdev_open_while_hotremove); CU_ADD_TEST(suite, bdev_close_while_hotremove); CU_ADD_TEST(suite, bdev_open_ext);