From e7fbdf15fd210aa95385f22749a2b89bbf56fc61 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Fri, 14 May 2021 18:23:16 +0800 Subject: [PATCH] bdev: add write zeroes split support The common bdev layer will split large WRITE ZEROES ranges into multiple children requests based on the backend device's setting, it will try to split up to 8 children requests at a time to avoid flood requests. Also add UT to cover different cases. Change-Id: Id9505fbe1c297412ef97b1f73587b22bc43f770e Signed-off-by: Changpeng Liu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7875 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Shuhei Matsumoto --- lib/bdev/bdev.c | 61 +++++++++++++++++ test/unit/lib/bdev/bdev.c/bdev_ut.c | 102 ++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 636d42bfb..b41738465 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -2026,6 +2026,20 @@ bdev_unmap_should_split(struct spdk_bdev_io *bdev_io) return false; } +static bool +bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io) +{ + if (!bdev_io->bdev->max_write_zeroes) { + return false; + } + + if (bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_write_zeroes) { + return true; + } + + return false; +} + static bool bdev_io_should_split(struct spdk_bdev_io *bdev_io) { @@ -2035,6 +2049,8 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io) return bdev_rw_should_split(bdev_io); case SPDK_BDEV_IO_TYPE_UNMAP: return bdev_unmap_should_split(bdev_io); + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + return bdev_write_zeroes_should_split(bdev_io); default: return false; } @@ -2061,6 +2077,15 @@ _bdev_unmap_split(void *_bdev_io) return bdev_unmap_split((struct spdk_bdev_io *)_bdev_io); } +static void +bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io); + +static void +_bdev_write_zeroes_split(void *_bdev_io) +{ + return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io); +} + static int bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf, uint64_t num_blocks, uint64_t *offset, uint64_t *remaining) @@ -2097,6 +2122,13 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt current_offset, num_blocks, bdev_io_split_done, bdev_io); break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + io_wait_fn = _bdev_write_zeroes_split; + rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc, + spdk_io_channel_from_ctx(bdev_io->internal.ch), + current_offset, num_blocks, + bdev_io_split_done, bdev_io); + break; default: assert(false); rc = -EINVAL; @@ -2292,6 +2324,29 @@ bdev_unmap_split(struct spdk_bdev_io *bdev_io) } } +static void +bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io) +{ + uint64_t offset, write_zeroes_blocks, remaining; + uint32_t num_children_reqs = 0; + int rc; + + offset = bdev_io->u.bdev.split_current_offset_blocks; + remaining = bdev_io->u.bdev.split_remaining_num_blocks; + + while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) { + write_zeroes_blocks = spdk_min(remaining, bdev_io->bdev->max_write_zeroes); + + rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, write_zeroes_blocks, + &offset, &remaining); + if (spdk_likely(rc == 0)) { + num_children_reqs++; + } else { + return; + } + } +} + static void bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { @@ -2335,6 +2390,9 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) case SPDK_BDEV_IO_TYPE_UNMAP: bdev_unmap_split(parent_io); break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + bdev_write_zeroes_split(parent_io); + break; default: assert(false); break; @@ -2366,6 +2424,9 @@ bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) case SPDK_BDEV_IO_TYPE_UNMAP: bdev_unmap_split(bdev_io); break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + bdev_write_zeroes_split(bdev_io); + break; default: assert(false); break; diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 3028a7222..3a278d2bd 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -4567,6 +4567,107 @@ bdev_unmap(void) poll_threads(); } +static void +bdev_write_zeroes_split_test(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc = NULL; + struct spdk_io_channel *ioch; + struct spdk_bdev_channel *bdev_ch; + struct ut_expected_io *expected_io; + struct spdk_bdev_opts bdev_opts = {}; + uint32_t i, num_outstanding; + uint64_t offset, num_blocks, max_write_zeroes_blocks, num_children; + int rc; + + spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts)); + bdev_opts.bdev_io_pool_size = 512; + bdev_opts.bdev_io_cache_size = 64; + rc = spdk_bdev_set_opts(&bdev_opts); + CU_ASSERT(rc == 0); + + spdk_bdev_initialize(bdev_init_cb, NULL); + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc); + CU_ASSERT_EQUAL(rc, 0); + SPDK_CU_ASSERT_FATAL(desc != NULL); + CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc)); + ioch = spdk_bdev_get_io_channel(desc); + SPDK_CU_ASSERT_FATAL(ioch != NULL); + bdev_ch = spdk_io_channel_get_ctx(ioch); + CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted)); + + fn_table.submit_request = stub_submit_request; + g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + /* Case 1: First test the request won't be split */ + num_blocks = 32; + + g_io_done = false; + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0); + + /* Case 2: Test the split with 2 children requests */ + max_write_zeroes_blocks = 8; + bdev->max_write_zeroes = max_write_zeroes_blocks; + num_blocks = max_write_zeroes_blocks * 2; + offset = 0; + + g_io_done = false; + for (i = 0; i < 2; i++) { + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks, + 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + offset += max_write_zeroes_blocks; + } + + rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2); + stub_complete_io(2); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0); + + /* Case 3: Test the split with 15 children requests, will finish 8 requests first */ + num_children = 15; + num_blocks = max_write_zeroes_blocks * num_children; + g_io_done = false; + offset = 0; + for (i = 0; i < num_children; i++) { + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks, + 0); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + offset += max_write_zeroes_blocks; + } + + rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL); + CU_ASSERT_EQUAL(rc, 0); + CU_ASSERT(g_io_done == false); + + while (num_children > 0) { + num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding); + stub_complete_io(num_outstanding); + num_children -= num_outstanding; + } + CU_ASSERT(g_io_done == true); + + spdk_put_io_channel(ioch); + spdk_bdev_close(desc); + free_bdev(bdev); + spdk_bdev_finish(bdev_fini_cb, NULL); + poll_threads(); +} + static void bdev_set_options_test(void) { @@ -4712,6 +4813,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, lock_lba_range_overlapped); CU_ADD_TEST(suite, bdev_io_abort); CU_ADD_TEST(suite, bdev_unmap); + CU_ADD_TEST(suite, bdev_write_zeroes_split_test); CU_ADD_TEST(suite, bdev_set_options_test); CU_ADD_TEST(suite, bdev_multi_allocation);