bdev: split bdev io base on IO size and segments

When the backend device supports max segments and
max size, we may need to split the IO if the IO segment
size is bigger than max_size or iovcnt is bigger
than max_segments.

Add unit test for span split

Change-Id: If8e9c4f903b7def0ad7ddec7dc5aab8410498db5
Signed-off-by: Jin Yu <jin.yu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4602
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Jin Yu 2020-10-12 23:53:06 +08:00 committed by Tomasz Zawadzki
parent 8518219a62
commit 3b616c0f0c
2 changed files with 123 additions and 30 deletions

View File

@ -1861,10 +1861,13 @@ bdev_io_type_can_split(uint8_t type)
static bool static bool
bdev_io_should_split(struct spdk_bdev_io *bdev_io) bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{ {
uint64_t start_stripe, end_stripe;
uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary; uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
uint32_t max_size = bdev_io->bdev->max_segment_size;
int max_segs = bdev_io->bdev->max_num_segments;
if (io_boundary == 0) { io_boundary = bdev_io->bdev->split_on_optimal_io_boundary ? io_boundary : 0;
if (spdk_likely(!io_boundary && !max_segs && !max_size)) {
return false; return false;
} }
@ -1872,6 +1875,9 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io)
return false; return false;
} }
if (io_boundary) {
uint64_t start_stripe, end_stripe;
start_stripe = bdev_io->u.bdev.offset_blocks; start_stripe = bdev_io->u.bdev.offset_blocks;
end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1; end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
/* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */ /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
@ -1882,7 +1888,27 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io)
start_stripe /= io_boundary; start_stripe /= io_boundary;
end_stripe /= io_boundary; end_stripe /= io_boundary;
} }
return (start_stripe != end_stripe);
if (start_stripe != end_stripe) {
return true;
}
}
if (max_segs) {
if (bdev_io->u.bdev.iovcnt > max_segs) {
return true;
}
}
if (max_size) {
for (int i = 0; i < bdev_io->u.bdev.iovcnt; i++) {
if (bdev_io->u.bdev.iovs[i].iov_len > max_size) {
return true;
}
}
}
return false;
} }
static uint32_t static uint32_t
@ -1897,19 +1923,28 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
static void static void
_bdev_io_split(void *_bdev_io) _bdev_io_split(void *_bdev_io)
{ {
struct spdk_bdev_io *bdev_io = _bdev_io;
uint64_t parent_offset, current_offset, remaining;
uint32_t blocklen, to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
struct iovec *parent_iov, *iov; struct iovec *parent_iov, *iov;
uint64_t parent_iov_offset, iov_len; struct spdk_bdev_io *bdev_io = _bdev_io;
uint32_t parent_iovpos, parent_iovcnt, child_iovcnt, iovcnt; struct spdk_bdev *bdev = bdev_io->bdev;
uint64_t parent_offset, current_offset, remaining;
uint32_t parent_iov_offset, parent_iovcnt, parent_iovpos, child_iovcnt;
uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
uint32_t iovcnt, iov_len, child_iovsize;
uint32_t blocklen = bdev->blocklen;
uint32_t io_boundary = bdev->optimal_io_boundary;
uint32_t max_segment_size = bdev->max_segment_size;
uint32_t max_child_iovcnt = bdev->max_num_segments;
void *md_buf = NULL; void *md_buf = NULL;
int rc; int rc;
max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX;
max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, BDEV_IO_NUM_CHILD_IOV) :
BDEV_IO_NUM_CHILD_IOV;
io_boundary = bdev->split_on_optimal_io_boundary ? io_boundary : UINT32_MAX;
remaining = bdev_io->u.bdev.split_remaining_num_blocks; remaining = bdev_io->u.bdev.split_remaining_num_blocks;
current_offset = bdev_io->u.bdev.split_current_offset_blocks; current_offset = bdev_io->u.bdev.split_current_offset_blocks;
parent_offset = bdev_io->u.bdev.offset_blocks; parent_offset = bdev_io->u.bdev.offset_blocks;
blocklen = bdev_io->bdev->blocklen;
parent_iov_offset = (current_offset - parent_offset) * blocklen; parent_iov_offset = (current_offset - parent_offset) * blocklen;
parent_iovcnt = bdev_io->u.bdev.iovcnt; parent_iovcnt = bdev_io->u.bdev.iovcnt;
@ -1923,21 +1958,26 @@ _bdev_io_split(void *_bdev_io)
child_iovcnt = 0; child_iovcnt = 0;
while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) { while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary); to_next_boundary = _to_next_boundary(current_offset, io_boundary);
to_next_boundary = spdk_min(remaining, to_next_boundary); to_next_boundary = spdk_min(remaining, to_next_boundary);
to_next_boundary_bytes = to_next_boundary * blocklen; to_next_boundary_bytes = to_next_boundary * blocklen;
iov = &bdev_io->child_iov[child_iovcnt]; iov = &bdev_io->child_iov[child_iovcnt];
iovcnt = 0; iovcnt = 0;
if (bdev_io->u.bdev.md_buf) { if (bdev_io->u.bdev.md_buf) {
md_buf = (char *)bdev_io->u.bdev.md_buf + md_buf = (char *)bdev_io->u.bdev.md_buf +
(current_offset - parent_offset) * spdk_bdev_get_md_size(bdev_io->bdev); (current_offset - parent_offset) * spdk_bdev_get_md_size(bdev);
} }
child_iovsize = spdk_min(BDEV_IO_NUM_CHILD_IOV - child_iovcnt, max_child_iovcnt);
while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt && while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
child_iovcnt < BDEV_IO_NUM_CHILD_IOV) { iovcnt < child_iovsize) {
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos]; parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
iov_len = spdk_min(to_next_boundary_bytes, parent_iov->iov_len - parent_iov_offset); iov_len = parent_iov->iov_len - parent_iov_offset;
iov_len = spdk_min(iov_len, max_segment_size);
iov_len = spdk_min(iov_len, to_next_boundary_bytes);
to_next_boundary_bytes -= iov_len; to_next_boundary_bytes -= iov_len;
bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset; bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
@ -1955,15 +1995,19 @@ _bdev_io_split(void *_bdev_io)
if (to_next_boundary_bytes > 0) { if (to_next_boundary_bytes > 0) {
/* We had to stop this child I/O early because we ran out of /* We had to stop this child I/O early because we ran out of
* child_iov space. Ensure the iovs to be aligned with block * child_iov space or were limited by max_num_segments.
* size and then adjust to_next_boundary before starting the * Ensure the iovs to be aligned with block size and
* then adjust to_next_boundary before starting the
* child I/O. * child I/O.
*/ */
assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV); assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV ||
iovcnt == child_iovsize);
to_last_block_bytes = to_next_boundary_bytes % blocklen; to_last_block_bytes = to_next_boundary_bytes % blocklen;
if (to_last_block_bytes != 0) { if (to_last_block_bytes != 0) {
uint32_t child_iovpos = child_iovcnt - 1; uint32_t child_iovpos = child_iovcnt - 1;
/* don't decrease child_iovcnt so the loop will naturally end */ /* don't decrease child_iovcnt when it equals to BDEV_IO_NUM_CHILD_IOV
* so the loop will naturally end
*/
to_last_block_bytes = blocklen - to_last_block_bytes; to_last_block_bytes = blocklen - to_last_block_bytes;
to_next_boundary_bytes += to_last_block_bytes; to_next_boundary_bytes += to_last_block_bytes;
@ -1974,10 +2018,30 @@ _bdev_io_split(void *_bdev_io)
if (bdev_io->child_iov[child_iovpos].iov_len == 0) { if (bdev_io->child_iov[child_iovpos].iov_len == 0) {
child_iovpos--; child_iovpos--;
if (--iovcnt == 0) { if (--iovcnt == 0) {
/* If the child IO is less than a block size just return.
* If the first child IO of any split round is less than
* a block size, an error exit.
*/
if (bdev_io->u.bdev.split_outstanding == 0) {
SPDK_ERRLOG("The first child io was less than a block size\n");
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
spdk_trace_record_tsc(spdk_get_ticks(), TRACE_BDEV_IO_DONE, 0, 0,
(uintptr_t)bdev_io, 0);
TAILQ_REMOVE(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
}
return; return;
} }
} }
to_last_block_bytes -= iov_len; to_last_block_bytes -= iov_len;
if (parent_iov_offset == 0) {
parent_iovpos--;
parent_iov_offset = bdev_io->u.bdev.iovs[parent_iovpos].iov_len;
}
parent_iov_offset -= iov_len;
} }
assert(to_last_block_bytes == 0); assert(to_last_block_bytes == 0);
@ -2215,7 +2279,7 @@ bdev_io_submit(struct spdk_bdev_io *bdev_io)
TAILQ_INSERT_TAIL(&ch->io_submitted, bdev_io, internal.ch_link); TAILQ_INSERT_TAIL(&ch->io_submitted, bdev_io, internal.ch_link);
if (bdev->split_on_optimal_io_boundary && bdev_io_should_split(bdev_io)) { if (bdev_io_should_split(bdev_io)) {
bdev_io->internal.submit_tsc = spdk_get_ticks(); bdev_io->internal.submit_tsc = spdk_get_ticks();
spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START, 0, 0, spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START, 0, 0,
(uintptr_t)bdev_io, bdev_io->type); (uintptr_t)bdev_io, bdev_io->type);

View File

@ -1014,19 +1014,24 @@ bdev_io_wait_test(void)
} }
static void static void
bdev_io_spans_boundary_test(void) bdev_io_spans_split_test(void)
{ {
struct spdk_bdev bdev; struct spdk_bdev bdev;
struct spdk_bdev_io bdev_io; struct spdk_bdev_io bdev_io;
struct iovec iov[BDEV_IO_NUM_CHILD_IOV];
memset(&bdev, 0, sizeof(bdev)); memset(&bdev, 0, sizeof(bdev));
bdev_io.u.bdev.iovs = iov;
bdev.optimal_io_boundary = 0; bdev.optimal_io_boundary = 0;
bdev.max_segment_size = 0;
bdev.max_num_segments = 0;
bdev_io.bdev = &bdev; bdev_io.bdev = &bdev;
/* bdev has no optimal_io_boundary set - so this should return false. */ /* bdev has no optimal_io_boundary and max_size set - so this should return false. */
CU_ASSERT(bdev_io_should_split(&bdev_io) == false); CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
bdev.split_on_optimal_io_boundary = true;
bdev.optimal_io_boundary = 32; bdev.optimal_io_boundary = 32;
bdev_io.type = SPDK_BDEV_IO_TYPE_RESET; bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;
@ -1044,6 +1049,30 @@ bdev_io_spans_boundary_test(void)
/* This I/O spans a boundary. */ /* This I/O spans a boundary. */
CU_ASSERT(bdev_io_should_split(&bdev_io) == true); CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
bdev_io.u.bdev.num_blocks = 32;
bdev.max_segment_size = 512 * 32;
bdev.max_num_segments = 1;
bdev_io.u.bdev.iovcnt = 1;
iov[0].iov_len = 512;
/* Does not cross and exceed max_size or max_segs */
CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
bdev.split_on_optimal_io_boundary = false;
bdev.max_segment_size = 512;
bdev.max_num_segments = 1;
bdev_io.u.bdev.iovcnt = 2;
/* Exceed max_segs */
CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
bdev.max_num_segments = 2;
iov[0].iov_len = 513;
iov[1].iov_len = 512;
/* Exceed max_sizes */
CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
} }
static void static void
@ -3430,7 +3459,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, get_device_stat_test); CU_ADD_TEST(suite, get_device_stat_test);
CU_ADD_TEST(suite, bdev_io_types_test); CU_ADD_TEST(suite, bdev_io_types_test);
CU_ADD_TEST(suite, bdev_io_wait_test); CU_ADD_TEST(suite, bdev_io_wait_test);
CU_ADD_TEST(suite, bdev_io_spans_boundary_test); CU_ADD_TEST(suite, bdev_io_spans_split_test);
CU_ADD_TEST(suite, bdev_io_split_test); CU_ADD_TEST(suite, bdev_io_split_test);
CU_ADD_TEST(suite, bdev_io_split_with_io_wait); CU_ADD_TEST(suite, bdev_io_split_with_io_wait);
CU_ADD_TEST(suite, bdev_io_alignment_with_boundary); CU_ADD_TEST(suite, bdev_io_alignment_with_boundary);