From 3b616c0f0cb2fcf3bf51e268b63fabe54a2ac84e Mon Sep 17 00:00:00 2001 From: Jin Yu Date: Mon, 12 Oct 2020 23:53:06 +0800 Subject: [PATCH] bdev: split bdev io base on IO size and segments When the backend device supports max segments and max size, we may need to split the IO if the IO segment size is bigger than max_size or iovcnt is bigger than max_segments. Add unit test for span split Change-Id: If8e9c4f903b7def0ad7ddec7dc5aab8410498db5 Signed-off-by: Jin Yu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4602 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Shuhei Matsumoto --- lib/bdev/bdev.c | 118 +++++++++++++++++++++------- test/unit/lib/bdev/bdev.c/bdev_ut.c | 35 ++++++++- 2 files changed, 123 insertions(+), 30 deletions(-) diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index a0595200e..125852850 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -1861,10 +1861,13 @@ bdev_io_type_can_split(uint8_t type) static bool bdev_io_should_split(struct spdk_bdev_io *bdev_io) { - uint64_t start_stripe, end_stripe; uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary; + uint32_t max_size = bdev_io->bdev->max_segment_size; + int max_segs = bdev_io->bdev->max_num_segments; - if (io_boundary == 0) { + io_boundary = bdev_io->bdev->split_on_optimal_io_boundary ? io_boundary : 0; + + if (spdk_likely(!io_boundary && !max_segs && !max_size)) { return false; } @@ -1872,17 +1875,40 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io) return false; } - start_stripe = bdev_io->u.bdev.offset_blocks; - end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1; - /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */ - if (spdk_likely(spdk_u32_is_pow2(io_boundary))) { - start_stripe >>= spdk_u32log2(io_boundary); - end_stripe >>= spdk_u32log2(io_boundary); - } else { - start_stripe /= io_boundary; - end_stripe /= io_boundary; + if (io_boundary) { + uint64_t start_stripe, end_stripe; + + start_stripe = bdev_io->u.bdev.offset_blocks; + end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1; + /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */ + if (spdk_likely(spdk_u32_is_pow2(io_boundary))) { + start_stripe >>= spdk_u32log2(io_boundary); + end_stripe >>= spdk_u32log2(io_boundary); + } else { + start_stripe /= io_boundary; + end_stripe /= io_boundary; + } + + if (start_stripe != end_stripe) { + return true; + } } - return (start_stripe != end_stripe); + + if (max_segs) { + if (bdev_io->u.bdev.iovcnt > max_segs) { + return true; + } + } + + if (max_size) { + for (int i = 0; i < bdev_io->u.bdev.iovcnt; i++) { + if (bdev_io->u.bdev.iovs[i].iov_len > max_size) { + return true; + } + } + } + + return false; } static uint32_t @@ -1897,19 +1923,28 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); static void _bdev_io_split(void *_bdev_io) { - struct spdk_bdev_io *bdev_io = _bdev_io; - uint64_t parent_offset, current_offset, remaining; - uint32_t blocklen, to_next_boundary, to_next_boundary_bytes, to_last_block_bytes; struct iovec *parent_iov, *iov; - uint64_t parent_iov_offset, iov_len; - uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt; + struct spdk_bdev_io *bdev_io = _bdev_io; + struct spdk_bdev *bdev = bdev_io->bdev; + uint64_t parent_offset, current_offset, remaining; + uint32_t parent_iov_offset, parent_iovcnt, parent_iovpos, child_iovcnt; + uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes; + uint32_t iovcnt, iov_len, child_iovsize; + uint32_t blocklen = bdev->blocklen; + uint32_t io_boundary = bdev->optimal_io_boundary; + uint32_t max_segment_size = bdev->max_segment_size; + uint32_t max_child_iovcnt = bdev->max_num_segments; void *md_buf = NULL; int rc; + max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX; + max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, BDEV_IO_NUM_CHILD_IOV) : + BDEV_IO_NUM_CHILD_IOV; + io_boundary = bdev->split_on_optimal_io_boundary ? io_boundary : UINT32_MAX; + remaining = bdev_io->u.bdev.split_remaining_num_blocks; current_offset = bdev_io->u.bdev.split_current_offset_blocks; parent_offset = bdev_io->u.bdev.offset_blocks; - blocklen = bdev_io->bdev->blocklen; parent_iov_offset = (current_offset - parent_offset) * blocklen; parent_iovcnt = bdev_io->u.bdev.iovcnt; @@ -1923,21 +1958,26 @@ _bdev_io_split(void *_bdev_io) child_iovcnt = 0; while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) { - to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary); + to_next_boundary = _to_next_boundary(current_offset, io_boundary); to_next_boundary = spdk_min(remaining, to_next_boundary); to_next_boundary_bytes = to_next_boundary * blocklen; + iov = &bdev_io->child_iov[child_iovcnt]; iovcnt = 0; if (bdev_io->u.bdev.md_buf) { md_buf = (char *)bdev_io->u.bdev.md_buf + - (current_offset - parent_offset) * spdk_bdev_get_md_size(bdev_io->bdev); + (current_offset - parent_offset) * spdk_bdev_get_md_size(bdev); } + child_iovsize = spdk_min(BDEV_IO_NUM_CHILD_IOV - child_iovcnt, max_child_iovcnt); while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt && - child_iovcnt < BDEV_IO_NUM_CHILD_IOV) { + iovcnt < child_iovsize) { parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos]; - iov_len = spdk_min(to_next_boundary_bytes, parent_iov->iov_len - parent_iov_offset); + iov_len = parent_iov->iov_len - parent_iov_offset; + + iov_len = spdk_min(iov_len, max_segment_size); + iov_len = spdk_min(iov_len, to_next_boundary_bytes); to_next_boundary_bytes -= iov_len; bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset; @@ -1955,15 +1995,19 @@ _bdev_io_split(void *_bdev_io) if (to_next_boundary_bytes > 0) { /* We had to stop this child I/O early because we ran out of - * child_iov space. Ensure the iovs to be aligned with block - * size and then adjust to_next_boundary before starting the + * child_iov space or were limited by max_num_segments. + * Ensure the iovs to be aligned with block size and + * then adjust to_next_boundary before starting the * child I/O. */ - assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV); + assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV || + iovcnt == child_iovsize); to_last_block_bytes = to_next_boundary_bytes % blocklen; if (to_last_block_bytes != 0) { uint32_t child_iovpos = child_iovcnt - 1; - /* don't decrease child_iovcnt so the loop will naturally end */ + /* don't decrease child_iovcnt when it equals to BDEV_IO_NUM_CHILD_IOV + * so the loop will naturally end + */ to_last_block_bytes = blocklen - to_last_block_bytes; to_next_boundary_bytes += to_last_block_bytes; @@ -1974,10 +2018,30 @@ _bdev_io_split(void *_bdev_io) if (bdev_io->child_iov[child_iovpos].iov_len == 0) { child_iovpos--; if (--iovcnt == 0) { + /* If the child IO is less than a block size just return. + * If the first child IO of any split round is less than + * a block size, an error exit. + */ + if (bdev_io->u.bdev.split_outstanding == 0) { + SPDK_ERRLOG("The first child io was less than a block size\n"); + bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED; + spdk_trace_record_tsc(spdk_get_ticks(), TRACE_BDEV_IO_DONE, 0, 0, + (uintptr_t)bdev_io, 0); + TAILQ_REMOVE(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link); + bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx); + } + return; } } + to_last_block_bytes -= iov_len; + + if (parent_iov_offset == 0) { + parent_iovpos--; + parent_iov_offset = bdev_io->u.bdev.iovs[parent_iovpos].iov_len; + } + parent_iov_offset -= iov_len; } assert(to_last_block_bytes == 0); @@ -2215,7 +2279,7 @@ bdev_io_submit(struct spdk_bdev_io *bdev_io) TAILQ_INSERT_TAIL(&ch->io_submitted, bdev_io, internal.ch_link); - if (bdev->split_on_optimal_io_boundary && bdev_io_should_split(bdev_io)) { + if (bdev_io_should_split(bdev_io)) { bdev_io->internal.submit_tsc = spdk_get_ticks(); spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type); diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 226996abb..5d3313446 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -1014,19 +1014,24 @@ bdev_io_wait_test(void) } static void -bdev_io_spans_boundary_test(void) +bdev_io_spans_split_test(void) { struct spdk_bdev bdev; struct spdk_bdev_io bdev_io; + struct iovec iov[BDEV_IO_NUM_CHILD_IOV]; memset(&bdev, 0, sizeof(bdev)); + bdev_io.u.bdev.iovs = iov; bdev.optimal_io_boundary = 0; + bdev.max_segment_size = 0; + bdev.max_num_segments = 0; bdev_io.bdev = &bdev; - /* bdev has no optimal_io_boundary set - so this should return false. */ + /* bdev has no optimal_io_boundary and max_size set - so this should return false. */ CU_ASSERT(bdev_io_should_split(&bdev_io) == false); + bdev.split_on_optimal_io_boundary = true; bdev.optimal_io_boundary = 32; bdev_io.type = SPDK_BDEV_IO_TYPE_RESET; @@ -1044,6 +1049,30 @@ bdev_io_spans_boundary_test(void) /* This I/O spans a boundary. */ CU_ASSERT(bdev_io_should_split(&bdev_io) == true); + + bdev_io.u.bdev.num_blocks = 32; + bdev.max_segment_size = 512 * 32; + bdev.max_num_segments = 1; + bdev_io.u.bdev.iovcnt = 1; + iov[0].iov_len = 512; + + /* Does not cross and exceed max_size or max_segs */ + CU_ASSERT(bdev_io_should_split(&bdev_io) == false); + + bdev.split_on_optimal_io_boundary = false; + bdev.max_segment_size = 512; + bdev.max_num_segments = 1; + bdev_io.u.bdev.iovcnt = 2; + + /* Exceed max_segs */ + CU_ASSERT(bdev_io_should_split(&bdev_io) == true); + + bdev.max_num_segments = 2; + iov[0].iov_len = 513; + iov[1].iov_len = 512; + + /* Exceed max_sizes */ + CU_ASSERT(bdev_io_should_split(&bdev_io) == true); } static void @@ -3430,7 +3459,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, get_device_stat_test); CU_ADD_TEST(suite, bdev_io_types_test); CU_ADD_TEST(suite, bdev_io_wait_test); - CU_ADD_TEST(suite, bdev_io_spans_boundary_test); + CU_ADD_TEST(suite, bdev_io_spans_split_test); CU_ADD_TEST(suite, bdev_io_split_test); CU_ADD_TEST(suite, bdev_io_split_with_io_wait); CU_ADD_TEST(suite, bdev_io_alignment_with_boundary);