bdev/raid: use split_on_optimal_io_boundary
Set the bdev->optimal_io_boundary to the strip size, and set split_on_optimal_io_boundary = true. This will ensure that all I/O submitted to the raid module do not cross a strip boundary, meaning it does not need to be split across multiple member disks. This is a step towards removing the iovcnt == 1 limitation. Further improvements and simplifications will be made in future patches before removing this restriction. Unit tests need to be adjusted here to not span boundaries either. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I08943805def673288f552a1b7662a4fbe16f25eb Reviewed-on: https://review.gerrithub.io/423323 Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
4f860d7e40
commit
2e6aac525c
@ -327,75 +327,58 @@ raid_bdev_submit_children(struct spdk_bdev_io *bdev_io,
|
|||||||
uint64_t pd_lba;
|
uint64_t pd_lba;
|
||||||
uint64_t pd_blocks;
|
uint64_t pd_blocks;
|
||||||
uint32_t pd_idx;
|
uint32_t pd_idx;
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
|
||||||
for (uint64_t strip = cur_strip; strip <= end_strip; strip++) {
|
if (start_strip != end_strip) {
|
||||||
|
SPDK_ERRLOG("I/O spans strip boundary\n");
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
pd_strip = start_strip / raid_bdev->num_base_bdevs;
|
||||||
|
pd_idx = start_strip % raid_bdev->num_base_bdevs;
|
||||||
|
offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
|
||||||
|
pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
|
||||||
|
pd_blocks = bdev_io->u.bdev.num_blocks;
|
||||||
|
raid_io->splits_comp_outstanding++;
|
||||||
|
assert(raid_io->splits_pending);
|
||||||
|
raid_io->splits_pending--;
|
||||||
|
if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
|
||||||
|
SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Submit child io to bdev layer with using base bdev descriptors, base
|
||||||
|
* bdev lba, base bdev child io length in blocks, buffer, completion
|
||||||
|
* function and function callback context
|
||||||
|
*/
|
||||||
|
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
||||||
|
ret = spdk_bdev_read_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
|
||||||
|
raid_ch->base_channel[pd_idx],
|
||||||
|
buf, pd_lba, pd_blocks, raid_bdev_io_completion,
|
||||||
|
bdev_io);
|
||||||
|
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
||||||
|
ret = spdk_bdev_write_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
|
||||||
|
raid_ch->base_channel[pd_idx],
|
||||||
|
buf, pd_lba, pd_blocks, raid_bdev_io_completion,
|
||||||
|
bdev_io);
|
||||||
|
} else {
|
||||||
|
SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
if (ret != 0) {
|
||||||
/*
|
/*
|
||||||
* For each strip of parent bdev io, process for each strip and submit
|
* If failed to submit child io to bdev layer then queue the parent
|
||||||
* child io to bdev layer. Calculate base bdev level start lba, length
|
* bdev io with current active split information in the wait queue
|
||||||
* and buffer for this child io
|
* for that core. This will get resume from this point only. Assume
|
||||||
|
* if 4 splits are required and 2 childs are submitted, then parent
|
||||||
|
* io is queued to io waitq of this core and it will get resumed and
|
||||||
|
* try to submit the remaining 3 and 4 childs
|
||||||
*/
|
*/
|
||||||
pd_strip = strip / raid_bdev->num_base_bdevs;
|
raid_io->buf = buf;
|
||||||
pd_idx = strip % raid_bdev->num_base_bdevs;
|
raid_io->splits_comp_outstanding--;
|
||||||
if (strip == start_strip) {
|
raid_io->splits_pending++;
|
||||||
offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
|
return ret;
|
||||||
pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
|
|
||||||
if (strip == end_strip) {
|
|
||||||
pd_blocks = bdev_io->u.bdev.num_blocks;
|
|
||||||
} else {
|
|
||||||
pd_blocks = raid_bdev->strip_size - offset_in_strip;
|
|
||||||
}
|
|
||||||
} else if (strip == end_strip) {
|
|
||||||
pd_lba = pd_strip << raid_bdev->strip_size_shift;
|
|
||||||
pd_blocks = ((bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) &
|
|
||||||
(raid_bdev->strip_size - 1)) + 1;
|
|
||||||
} else {
|
|
||||||
pd_lba = pd_strip << raid_bdev->strip_size_shift;
|
|
||||||
pd_blocks = raid_bdev->strip_size;
|
|
||||||
}
|
|
||||||
raid_io->splits_comp_outstanding++;
|
|
||||||
assert(raid_io->splits_pending);
|
|
||||||
raid_io->splits_pending--;
|
|
||||||
if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
|
|
||||||
SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Submit child io to bdev layer with using base bdev descriptors, base
|
|
||||||
* bdev lba, base bdev child io length in blocks, buffer, completion
|
|
||||||
* function and function callback context
|
|
||||||
*/
|
|
||||||
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
|
|
||||||
ret = spdk_bdev_read_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
|
|
||||||
raid_ch->base_channel[pd_idx],
|
|
||||||
buf, pd_lba, pd_blocks, raid_bdev_io_completion,
|
|
||||||
bdev_io);
|
|
||||||
|
|
||||||
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
|
|
||||||
ret = spdk_bdev_write_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
|
|
||||||
raid_ch->base_channel[pd_idx],
|
|
||||||
buf, pd_lba, pd_blocks, raid_bdev_io_completion,
|
|
||||||
bdev_io);
|
|
||||||
} else {
|
|
||||||
SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
if (ret != 0) {
|
|
||||||
/*
|
|
||||||
* If failed to submit child io to bdev layer then queue the parent
|
|
||||||
* bdev io with current active split information in the wait queue
|
|
||||||
* for that core. This will get resume from this point only. Assume
|
|
||||||
* if 4 splits are required and 2 childs are submitted, then parent
|
|
||||||
* io is queued to io waitq of this core and it will get resumed and
|
|
||||||
* try to submit the remaining 3 and 4 childs
|
|
||||||
*/
|
|
||||||
raid_io->buf = buf;
|
|
||||||
raid_io->splits_comp_outstanding--;
|
|
||||||
raid_io->splits_pending++;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
buf += (pd_blocks << raid_bdev->blocklen_shift);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1257,11 +1240,14 @@ raid_bdev_configure(struct raid_bdev *raid_bdev)
|
|||||||
raid_bdev_gen = &raid_bdev->bdev;
|
raid_bdev_gen = &raid_bdev->bdev;
|
||||||
raid_bdev_gen->write_cache = 0;
|
raid_bdev_gen->write_cache = 0;
|
||||||
raid_bdev_gen->blocklen = blocklen;
|
raid_bdev_gen->blocklen = blocklen;
|
||||||
raid_bdev_gen->optimal_io_boundary = 0;
|
raid_bdev_gen->ctxt = raid_bdev;
|
||||||
|
raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
|
||||||
|
raid_bdev_gen->module = &g_raid_if;
|
||||||
raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen;
|
raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen;
|
||||||
raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
|
raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
|
||||||
raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
|
raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
|
||||||
|
raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size;
|
||||||
|
raid_bdev_gen->split_on_optimal_io_boundary = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RAID bdev logic is for striping so take the minimum block count based
|
* RAID bdev logic is for striping so take the minimum block count based
|
||||||
|
@ -863,7 +863,8 @@ verify_raid_bdev(struct rpc_construct_raid_bdev *r, bool presence, uint32_t raid
|
|||||||
CU_ASSERT(strcmp(pbdev->bdev.product_name, "Pooled Device") == 0);
|
CU_ASSERT(strcmp(pbdev->bdev.product_name, "Pooled Device") == 0);
|
||||||
CU_ASSERT(pbdev->bdev.write_cache == 0);
|
CU_ASSERT(pbdev->bdev.write_cache == 0);
|
||||||
CU_ASSERT(pbdev->bdev.blocklen == g_block_len);
|
CU_ASSERT(pbdev->bdev.blocklen == g_block_len);
|
||||||
CU_ASSERT(pbdev->bdev.optimal_io_boundary == 0);
|
CU_ASSERT(pbdev->bdev.optimal_io_boundary == pbdev->strip_size);
|
||||||
|
CU_ASSERT(pbdev->bdev.split_on_optimal_io_boundary == true);
|
||||||
CU_ASSERT(pbdev->bdev.ctxt == pbdev);
|
CU_ASSERT(pbdev->bdev.ctxt == pbdev);
|
||||||
CU_ASSERT(pbdev->bdev.fn_table == &g_raid_bdev_fn_table);
|
CU_ASSERT(pbdev->bdev.fn_table == &g_raid_bdev_fn_table);
|
||||||
CU_ASSERT(pbdev->bdev.module == &g_raid_if);
|
CU_ASSERT(pbdev->bdev.module == &g_raid_if);
|
||||||
@ -1375,9 +1376,9 @@ test_write_io(void)
|
|||||||
for (count = 0; count < g_max_qd; count++) {
|
for (count = 0; count < g_max_qd; count++) {
|
||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
||||||
g_io_output_index = 0;
|
g_io_output_index = 0;
|
||||||
raid_bdev_submit_request(ch, bdev_io);
|
raid_bdev_submit_request(ch, bdev_io);
|
||||||
@ -1454,9 +1455,9 @@ test_read_io(void)
|
|||||||
for (count = 0; count < g_max_qd; count++) {
|
for (count = 0; count < g_max_qd; count++) {
|
||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_READ);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_READ);
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
||||||
g_io_output_index = 0;
|
g_io_output_index = 0;
|
||||||
raid_bdev_submit_request(ch, bdev_io);
|
raid_bdev_submit_request(ch, bdev_io);
|
||||||
@ -1533,9 +1534,9 @@ test_io_failure(void)
|
|||||||
for (count = 0; count < 1; count++) {
|
for (count = 0; count < 1; count++) {
|
||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_INVALID);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_INVALID);
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
||||||
g_io_output_index = 0;
|
g_io_output_index = 0;
|
||||||
raid_bdev_submit_request(ch, bdev_io);
|
raid_bdev_submit_request(ch, bdev_io);
|
||||||
@ -1551,9 +1552,9 @@ test_io_failure(void)
|
|||||||
for (count = 0; count < 1; count++) {
|
for (count = 0; count < 1; count++) {
|
||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
||||||
g_io_output_index = 0;
|
g_io_output_index = 0;
|
||||||
raid_bdev_submit_request(ch, bdev_io);
|
raid_bdev_submit_request(ch, bdev_io);
|
||||||
@ -1635,10 +1636,10 @@ test_io_waitq(void)
|
|||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
TAILQ_INSERT_TAIL(&head_io, bdev_io, module_link);
|
TAILQ_INSERT_TAIL(&head_io, bdev_io, module_link);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
|
||||||
g_bdev_io_submit_status = -ENOMEM;
|
g_bdev_io_submit_status = -ENOMEM;
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
raid_bdev_submit_request(ch, bdev_io);
|
raid_bdev_submit_request(ch, bdev_io);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1869,7 +1870,7 @@ test_multi_raid_with_io(void)
|
|||||||
for (count = 0; count < g_max_qd; count++) {
|
for (count = 0; count < g_max_qd; count++) {
|
||||||
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
|
||||||
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
|
||||||
io_len = (rand() % g_max_io_size) + 1;
|
io_len = (rand() % g_strip_size) + 1;
|
||||||
iotype = (rand() % 2) ? SPDK_BDEV_IO_TYPE_WRITE : SPDK_BDEV_IO_TYPE_READ;
|
iotype = (rand() % 2) ? SPDK_BDEV_IO_TYPE_WRITE : SPDK_BDEV_IO_TYPE_READ;
|
||||||
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
|
||||||
g_io_output_index = 0;
|
g_io_output_index = 0;
|
||||||
@ -1882,7 +1883,7 @@ test_multi_raid_with_io(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, iotype);
|
bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, iotype);
|
||||||
lba += io_len;
|
lba += g_strip_size;
|
||||||
CU_ASSERT(pbdev != NULL);
|
CU_ASSERT(pbdev != NULL);
|
||||||
raid_bdev_submit_request(ch_random, bdev_io);
|
raid_bdev_submit_request(ch_random, bdev_io);
|
||||||
verify_io(bdev_io, g_max_base_drives, ch_ctx_random, pbdev,
|
verify_io(bdev_io, g_max_base_drives, ch_ctx_random, pbdev,
|
||||||
|
Loading…
Reference in New Issue
Block a user