bdev: split writes based on write_unit_size
Add new bdev property split_on_write_unit which, if set to true, causes writes to be split to match write_unit_size and fail if not aligned to or not multiple of write_unit_size. Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Change-Id: Id49f58a3288ddf5cfe4921ce4020ae4bcdd67298 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11390 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Krzysztof Karas <krzysztof.karas@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
c89e20084b
commit
d6e9827e9f
@ -309,6 +309,20 @@ struct spdk_bdev {
|
||||
/** Number of blocks */
|
||||
uint64_t blockcnt;
|
||||
|
||||
/**
|
||||
* Specifies whether the write_unit_size is mandatory or
|
||||
* only advisory. If set to true, the bdev layer will split
|
||||
* WRITE I/O that span the write_unit_size before
|
||||
* submitting them to the bdev module.
|
||||
*
|
||||
* This field takes precedence over split_on_optimal_io_boundary
|
||||
* for WRITE I/O if both are set to true.
|
||||
*
|
||||
* Note that this field cannot be used to force splitting of
|
||||
* UNMAP, WRITE_ZEROES or FLUSH I/O.
|
||||
*/
|
||||
bool split_on_write_unit;
|
||||
|
||||
/** Number of blocks required for write */
|
||||
uint32_t write_unit_size;
|
||||
|
||||
|
@ -2166,6 +2166,14 @@ bdev_io_do_submit(struct spdk_bdev_channel *bdev_ch, struct spdk_bdev_io *bdev_i
|
||||
}
|
||||
}
|
||||
|
||||
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE &&
|
||||
bdev_io->bdev->split_on_write_unit &&
|
||||
bdev_io->u.bdev.num_blocks < bdev_io->bdev->write_unit_size)) {
|
||||
SPDK_ERRLOG("IO does not match the write_unit_size\n");
|
||||
_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
|
||||
bdev_ch->io_outstanding++;
|
||||
shared_resource->io_outstanding++;
|
||||
@ -2242,11 +2250,18 @@ bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb
|
||||
static bool
|
||||
bdev_rw_should_split(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
|
||||
uint32_t max_size = bdev_io->bdev->max_segment_size;
|
||||
int max_segs = bdev_io->bdev->max_num_segments;
|
||||
uint32_t io_boundary;
|
||||
struct spdk_bdev *bdev = bdev_io->bdev;
|
||||
uint32_t max_size = bdev->max_segment_size;
|
||||
int max_segs = bdev->max_num_segments;
|
||||
|
||||
io_boundary = bdev_io->bdev->split_on_optimal_io_boundary ? io_boundary : 0;
|
||||
if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
|
||||
io_boundary = bdev->write_unit_size;
|
||||
} else if (bdev->split_on_optimal_io_boundary) {
|
||||
io_boundary = bdev->optimal_io_boundary;
|
||||
} else {
|
||||
io_boundary = 0;
|
||||
}
|
||||
|
||||
if (spdk_likely(!io_boundary && !max_segs && !max_size)) {
|
||||
return false;
|
||||
@ -2449,7 +2464,7 @@ _bdev_rw_split(void *_bdev_io)
|
||||
uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
|
||||
uint32_t iovcnt, iov_len, child_iovsize;
|
||||
uint32_t blocklen = bdev->blocklen;
|
||||
uint32_t io_boundary = bdev->optimal_io_boundary;
|
||||
uint32_t io_boundary;
|
||||
uint32_t max_segment_size = bdev->max_segment_size;
|
||||
uint32_t max_child_iovcnt = bdev->max_num_segments;
|
||||
void *md_buf = NULL;
|
||||
@ -2458,7 +2473,14 @@ _bdev_rw_split(void *_bdev_io)
|
||||
max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX;
|
||||
max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, BDEV_IO_NUM_CHILD_IOV) :
|
||||
BDEV_IO_NUM_CHILD_IOV;
|
||||
io_boundary = bdev->split_on_optimal_io_boundary ? io_boundary : UINT32_MAX;
|
||||
|
||||
if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
|
||||
io_boundary = bdev->write_unit_size;
|
||||
} else if (bdev->split_on_optimal_io_boundary) {
|
||||
io_boundary = bdev->optimal_io_boundary;
|
||||
} else {
|
||||
io_boundary = UINT32_MAX;
|
||||
}
|
||||
|
||||
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
|
||||
current_offset = bdev_io->u.bdev.split_current_offset_blocks;
|
||||
|
@ -1249,6 +1249,25 @@ bdev_io_spans_split_test(void)
|
||||
|
||||
/* Exceed max_sizes */
|
||||
CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
|
||||
|
||||
bdev.max_segment_size = 0;
|
||||
bdev.write_unit_size = 32;
|
||||
bdev.split_on_write_unit = true;
|
||||
bdev_io.type = SPDK_BDEV_IO_TYPE_WRITE;
|
||||
|
||||
/* This I/O is one write unit */
|
||||
CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
|
||||
|
||||
bdev_io.u.bdev.num_blocks = 32 * 2;
|
||||
|
||||
/* This I/O is more than one write unit */
|
||||
CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
|
||||
|
||||
bdev_io.u.bdev.offset_blocks = 1;
|
||||
bdev_io.u.bdev.num_blocks = 32;
|
||||
|
||||
/* This I/O is not aligned to write unit size */
|
||||
CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -2846,6 +2865,127 @@ bdev_io_split_with_io_wait(void)
|
||||
poll_threads();
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_io_write_unit_split_test(void)
|
||||
{
|
||||
struct spdk_bdev *bdev;
|
||||
struct spdk_bdev_desc *desc = NULL;
|
||||
struct spdk_io_channel *io_ch;
|
||||
struct spdk_bdev_opts bdev_opts = {};
|
||||
struct iovec iov[BDEV_IO_NUM_CHILD_IOV * 4];
|
||||
struct ut_expected_io *expected_io;
|
||||
uint64_t i;
|
||||
int rc;
|
||||
|
||||
spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
|
||||
bdev_opts.bdev_io_pool_size = 512;
|
||||
bdev_opts.bdev_io_cache_size = 64;
|
||||
|
||||
rc = spdk_bdev_set_opts(&bdev_opts);
|
||||
CU_ASSERT(rc == 0);
|
||||
spdk_bdev_initialize(bdev_init_cb, NULL);
|
||||
|
||||
bdev = allocate_bdev("bdev0");
|
||||
|
||||
rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
|
||||
CU_ASSERT(rc == 0);
|
||||
SPDK_CU_ASSERT_FATAL(desc != NULL);
|
||||
io_ch = spdk_bdev_get_io_channel(desc);
|
||||
CU_ASSERT(io_ch != NULL);
|
||||
|
||||
/* Write I/O 2x larger than write_unit_size should get split into 2 I/Os */
|
||||
bdev->write_unit_size = 32;
|
||||
bdev->split_on_write_unit = true;
|
||||
g_io_done = false;
|
||||
|
||||
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 32, 1);
|
||||
ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 32 * 512);
|
||||
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
|
||||
|
||||
expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 32, 1);
|
||||
ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 32 * 512), 32 * 512);
|
||||
TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
|
||||
|
||||
rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
|
||||
stub_complete_io(2);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
/* Same as above but with optimal_io_boundary < write_unit_size - the I/O should be split
|
||||
* based on write_unit_size, not optimal_io_boundary */
|
||||
bdev->split_on_optimal_io_boundary = true;
|
||||
bdev->optimal_io_boundary = 16;
|
||||
g_io_done = false;
|
||||
|
||||
rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
|
||||
stub_complete_io(2);
|
||||
CU_ASSERT(g_io_done == true);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
/* Write I/O should fail if it is smaller than write_unit_size */
|
||||
g_io_done = false;
|
||||
|
||||
rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 31, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_io_done == true);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
|
||||
|
||||
/* Same for I/O not aligned to write_unit_size */
|
||||
g_io_done = false;
|
||||
|
||||
rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 1, 32, io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_io_done == true);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
|
||||
|
||||
/* Write should fail if it needs to be split but there are not enough iovs to submit
|
||||
* an entire write unit */
|
||||
bdev->write_unit_size = SPDK_COUNTOF(iov) / 2;
|
||||
g_io_done = false;
|
||||
|
||||
for (i = 0; i < SPDK_COUNTOF(iov); i++) {
|
||||
iov[i].iov_base = (void *)(0x1000 + 512 * i);
|
||||
iov[i].iov_len = 512;
|
||||
}
|
||||
|
||||
rc = spdk_bdev_writev_blocks(desc, io_ch, iov, SPDK_COUNTOF(iov), 0, SPDK_COUNTOF(iov),
|
||||
io_done, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(g_io_done == false);
|
||||
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_io_done == true);
|
||||
CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
|
||||
CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
|
||||
|
||||
spdk_put_io_channel(io_ch);
|
||||
spdk_bdev_close(desc);
|
||||
free_bdev(bdev);
|
||||
spdk_bdev_finish(bdev_fini_cb, NULL);
|
||||
poll_threads();
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_io_alignment(void)
|
||||
{
|
||||
@ -5874,6 +6014,7 @@ main(int argc, char **argv)
|
||||
CU_ADD_TEST(suite, bdev_io_max_size_and_segment_split_test);
|
||||
CU_ADD_TEST(suite, bdev_io_mix_split_test);
|
||||
CU_ADD_TEST(suite, bdev_io_split_with_io_wait);
|
||||
CU_ADD_TEST(suite, bdev_io_write_unit_split_test);
|
||||
CU_ADD_TEST(suite, bdev_io_alignment_with_boundary);
|
||||
CU_ADD_TEST(suite, bdev_io_alignment);
|
||||
CU_ADD_TEST(suite, bdev_histograms);
|
||||
|
Loading…
Reference in New Issue
Block a user