diff --git a/module/bdev/raid/raid5f.c b/module/bdev/raid/raid5f.c index b94d40d25..3131b08d4 100644 --- a/module/bdev/raid/raid5f.c +++ b/module/bdev/raid/raid5f.c @@ -9,7 +9,7 @@ #include "spdk/thread.h" #include "spdk/string.h" #include "spdk/util.h" - +#include "spdk/likely.h" #include "spdk/log.h" struct raid5f_info { @@ -29,10 +29,84 @@ raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) return raid_bdev->num_base_bdevs - raid_bdev->module->base_bdevs_max_degraded; } +static inline uint8_t +raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) +{ + return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; +} + +static void +raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_bdev_io *raid_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + +static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); + +static void +_raid5f_submit_rw_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; + + raid5f_submit_rw_request(raid_io); +} + +static int +raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, + uint64_t stripe_offset) +{ + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; + uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); + uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; + struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; + struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; + uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); + uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + int ret; + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + base_offset_blocks, bdev_io->u.bdev.num_blocks, + raid5f_chunk_read_complete, raid_io); + if (spdk_unlikely(ret == -ENOMEM)) { + raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, + _raid5f_submit_rw_request); + return 0; + } + + return ret; +} + static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io) { - raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid5f_info *r5f_info = raid_io->raid_bdev->module_private; + uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; + uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; + uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; + int ret; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + assert(bdev_io->u.bdev.num_blocks <= r5f_info->raid_bdev->strip_size); + ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); + break; + default: + ret = -EINVAL; + break; + } + + if (spdk_unlikely(ret)) { + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + } } static int @@ -57,7 +131,7 @@ raid5f_start(struct raid_bdev *raid_bdev) r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; - raid_bdev->bdev.optimal_io_boundary = r5f_info->stripe_blocks; + raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; raid_bdev->bdev.split_on_optimal_io_boundary = true; raid_bdev->module_private = r5f_info; diff --git a/test/unit/lib/bdev/raid/raid5f.c/raid5f_ut.c b/test/unit/lib/bdev/raid/raid5f.c/raid5f_ut.c index a0f736ff9..d9f41c51c 100644 --- a/test/unit/lib/bdev/raid/raid5f.c/raid5f_ut.c +++ b/test/unit/lib/bdev/raid/raid5f.c/raid5f_ut.c @@ -6,13 +6,23 @@ #include "spdk/stdinc.h" #include "spdk_cunit.h" #include "spdk/env.h" -#include "spdk_internal/mock.h" +#include "common/lib/test_env.c" #include "bdev/raid/raid5f.c" DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module)); -DEFINE_STUB_V(raid_bdev_io_complete, (struct raid_bdev_io *raid_io, - enum spdk_bdev_io_status status)); +DEFINE_STUB_V(raid_bdev_queue_io_wait, (struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)); + +void +raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + + if (bdev_io->internal.cb) { + bdev_io->internal.cb(bdev_io, status == SPDK_BDEV_IO_STATUS_SUCCESS, bdev_io->internal.caller_ctx); + } +} struct raid5f_params { uint8_t num_base_bdevs; @@ -160,12 +170,316 @@ test_raid5f_start(void) CU_ASSERT_EQUAL(r5f_info->raid_bdev->bdev.blockcnt, (params->base_bdev_blockcnt - params->base_bdev_blockcnt % params->strip_size) * (params->num_base_bdevs - 1)); - CU_ASSERT_EQUAL(r5f_info->raid_bdev->bdev.optimal_io_boundary, r5f_info->stripe_blocks); + CU_ASSERT_EQUAL(r5f_info->raid_bdev->bdev.optimal_io_boundary, params->strip_size); + CU_ASSERT_TRUE(r5f_info->raid_bdev->bdev.split_on_optimal_io_boundary); delete_raid5f(r5f_info); } } +struct raid_io_info { + struct raid5f_info *r5f_info; + struct raid_bdev_io_channel *raid_ch; + enum spdk_bdev_io_type io_type; + uint64_t offset_blocks; + uint64_t num_blocks; + void *src_buf; + void *dest_buf; + size_t buf_size; + enum spdk_bdev_io_status status; + bool failed; + int remaining; + TAILQ_HEAD(, spdk_bdev_io) bdev_io_queue; +}; + +struct test_raid_bdev_io { + char bdev_io_buf[sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)]; + struct raid_io_info *io_info; + void *buf; +}; + +static void +raid_bdev_io_completion_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_io_info *io_info = cb_arg; + + spdk_bdev_free_io(bdev_io); + + if (!success) { + io_info->failed = true; + } + + if (--io_info->remaining == 0) { + if (io_info->failed) { + io_info->status = SPDK_BDEV_IO_STATUS_FAILED; + } else { + io_info->status = SPDK_BDEV_IO_STATUS_SUCCESS; + } + } +} + +static struct raid_bdev_io * +get_raid_io(struct raid_io_info *io_info, uint64_t offset_blocks_split, uint64_t num_blocks) +{ + struct spdk_bdev_io *bdev_io; + struct raid_bdev_io *raid_io; + struct raid_bdev *raid_bdev = io_info->r5f_info->raid_bdev; + uint32_t blocklen = raid_bdev->bdev.blocklen; + struct test_raid_bdev_io *test_raid_bdev_io; + void *src_buf = io_info->src_buf + offset_blocks_split * blocklen; + void *dest_buf = io_info->dest_buf + offset_blocks_split * blocklen; + + test_raid_bdev_io = calloc(1, sizeof(*test_raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(test_raid_bdev_io != NULL); + + SPDK_CU_ASSERT_FATAL(test_raid_bdev_io->bdev_io_buf == (char *)test_raid_bdev_io); + bdev_io = (struct spdk_bdev_io *)test_raid_bdev_io->bdev_io_buf; + bdev_io->bdev = &raid_bdev->bdev; + bdev_io->type = io_info->io_type; + bdev_io->u.bdev.offset_blocks = io_info->offset_blocks + offset_blocks_split; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->internal.cb = raid_bdev_io_completion_cb; + bdev_io->internal.caller_ctx = io_info; + + raid_io = (void *)bdev_io->driver_ctx; + raid_io->raid_bdev = raid_bdev; + raid_io->raid_ch = io_info->raid_ch; + raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + test_raid_bdev_io->io_info = io_info; + + if (io_info->io_type == SPDK_BDEV_IO_TYPE_READ) { + test_raid_bdev_io->buf = src_buf; + bdev_io->iov.iov_base = dest_buf; + } else { + test_raid_bdev_io->buf = dest_buf; + bdev_io->iov.iov_base = src_buf; + } + + bdev_io->u.bdev.iovs = &bdev_io->iov; + bdev_io->u.bdev.iovcnt = 1; + bdev_io->iov.iov_len = num_blocks * blocklen; + + io_info->remaining++; + + return raid_io; +} + +void +spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) +{ + free(bdev_io); +} + +static void +submit_io(struct raid_io_info *io_info, struct spdk_bdev_desc *desc, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct spdk_bdev_io *bdev_io; + + bdev_io = calloc(1, sizeof(*bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + bdev_io->internal.cb = cb; + bdev_io->internal.caller_ctx = cb_arg; + + TAILQ_INSERT_TAIL(&io_info->bdev_io_queue, bdev_io, internal.link); +} + +static void +process_io_completions(struct raid_io_info *io_info) +{ + struct spdk_bdev_io *bdev_io; + + while ((bdev_io = TAILQ_FIRST(&io_info->bdev_io_queue))) { + TAILQ_REMOVE(&io_info->bdev_io_queue, bdev_io, internal.link); + + bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx); + } +} + +int +spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct raid_bdev_io *raid_io = cb_arg; + struct test_raid_bdev_io *test_raid_bdev_io; + + SPDK_CU_ASSERT_FATAL(cb == raid5f_chunk_read_complete); + SPDK_CU_ASSERT_FATAL(iovcnt == 1); + + test_raid_bdev_io = (struct test_raid_bdev_io *)spdk_bdev_io_from_ctx(raid_io); + + memcpy(iov->iov_base, test_raid_bdev_io->buf, iov->iov_len); + + submit_io(test_raid_bdev_io->io_info, desc, cb, cb_arg); + + return 0; +} + +static void +test_raid5f_read_request(struct raid_io_info *io_info) +{ + uint32_t strip_size = io_info->r5f_info->raid_bdev->strip_size; + uint64_t num_blocks = io_info->num_blocks; + uint64_t offset_blocks_split = 0; + + while (num_blocks) { + uint64_t chunk_offset = offset_blocks_split % strip_size; + uint64_t num_blocks_split = spdk_min(num_blocks, strip_size - chunk_offset); + struct raid_bdev_io *raid_io; + + raid_io = get_raid_io(io_info, offset_blocks_split, num_blocks_split); + + raid5f_submit_rw_request(raid_io); + + num_blocks -= num_blocks_split; + offset_blocks_split += num_blocks_split; + } + + process_io_completions(io_info); +} + +static void +deinit_io_info(struct raid_io_info *io_info) +{ + free(io_info->src_buf); + free(io_info->dest_buf); +} + +static void +init_io_info(struct raid_io_info *io_info, struct raid5f_info *r5f_info, + struct raid_bdev_io_channel *raid_ch, enum spdk_bdev_io_type io_type, + uint64_t offset_blocks, uint64_t num_blocks) +{ + struct raid_bdev *raid_bdev = r5f_info->raid_bdev; + uint32_t blocklen = raid_bdev->bdev.blocklen; + void *src_buf, *dest_buf; + size_t buf_size = num_blocks * blocklen; + uint64_t block; + + memset(io_info, 0, sizeof(*io_info)); + + src_buf = spdk_dma_malloc(buf_size, 4096, NULL); + SPDK_CU_ASSERT_FATAL(src_buf != NULL); + + dest_buf = spdk_dma_malloc(buf_size, 4096, NULL); + SPDK_CU_ASSERT_FATAL(dest_buf != NULL); + + memset(src_buf, 0xff, buf_size); + for (block = 0; block < num_blocks; block++) { + *((uint64_t *)(src_buf + block * blocklen)) = block; + } + + io_info->r5f_info = r5f_info; + io_info->raid_ch = raid_ch; + io_info->io_type = io_type; + io_info->offset_blocks = offset_blocks; + io_info->num_blocks = num_blocks; + io_info->src_buf = src_buf; + io_info->dest_buf = dest_buf; + io_info->buf_size = buf_size; + io_info->status = SPDK_BDEV_IO_STATUS_PENDING; + + TAILQ_INIT(&io_info->bdev_io_queue); +} + +static void +test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_channel *raid_ch, + enum spdk_bdev_io_type io_type, uint64_t stripe_index, uint64_t stripe_offset_blocks, + uint64_t num_blocks) +{ + uint64_t offset_blocks = stripe_index * r5f_info->stripe_blocks + stripe_offset_blocks; + struct raid_io_info io_info; + + init_io_info(&io_info, r5f_info, raid_ch, io_type, offset_blocks, num_blocks); + + switch (io_type) { + case SPDK_BDEV_IO_TYPE_READ: + test_raid5f_read_request(&io_info); + break; + default: + CU_FAIL_FATAL("unsupported io_type"); + } + + CU_ASSERT(io_info.status == SPDK_BDEV_IO_STATUS_SUCCESS); + CU_ASSERT(memcmp(io_info.src_buf, io_info.dest_buf, io_info.buf_size) == 0); + + deinit_io_info(&io_info); +} + +static void +run_for_each_raid5f_config(void (*test_fn)(struct raid_bdev *raid_bdev, + struct raid_bdev_io_channel *raid_ch)) +{ + struct raid5f_params *params; + + RAID5F_PARAMS_FOR_EACH(params) { + struct raid5f_info *r5f_info; + struct raid_bdev_io_channel raid_ch = { 0 }; + + r5f_info = create_raid5f(params); + + raid_ch.num_channels = params->num_base_bdevs; + raid_ch.base_channel = calloc(params->num_base_bdevs, sizeof(struct spdk_io_channel *)); + SPDK_CU_ASSERT_FATAL(raid_ch.base_channel != NULL); + + test_fn(r5f_info->raid_bdev, &raid_ch); + + free(raid_ch.base_channel); + + delete_raid5f(r5f_info); + } +} + +#define RAID5F_TEST_FOR_EACH_STRIPE(raid_bdev, i) \ + for (i = 0; i < spdk_min(raid_bdev->num_base_bdevs, ((struct raid5f_info *)raid_bdev->module_private)->total_stripes); i++) + +struct test_request_conf { + uint64_t stripe_offset_blocks; + uint64_t num_blocks; +}; + +static void +__test_raid5f_submit_read_request(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch) +{ + struct raid5f_info *r5f_info = raid_bdev->module_private; + uint32_t strip_size = raid_bdev->strip_size; + unsigned int i; + + struct test_request_conf test_requests[] = { + { 0, 1 }, + { 0, strip_size }, + { 0, strip_size + 1 }, + { 0, r5f_info->stripe_blocks }, + { 1, 1 }, + { 1, strip_size }, + { 1, strip_size + 1 }, + { strip_size, 1 }, + { strip_size, strip_size }, + { strip_size, strip_size + 1 }, + { strip_size - 1, 1 }, + { strip_size - 1, strip_size }, + { strip_size - 1, strip_size + 1 }, + { strip_size - 1, 2 }, + }; + for (i = 0; i < SPDK_COUNTOF(test_requests); i++) { + struct test_request_conf *t = &test_requests[i]; + uint64_t stripe_index; + + RAID5F_TEST_FOR_EACH_STRIPE(raid_bdev, stripe_index) { + test_raid5f_submit_rw_request(r5f_info, raid_ch, SPDK_BDEV_IO_TYPE_READ, + stripe_index, t->stripe_offset_blocks, t->num_blocks); + } + } +} +static void +test_raid5f_submit_read_request(void) +{ + run_for_each_raid5f_config(__test_raid5f_submit_read_request); +} + int main(int argc, char **argv) { @@ -177,6 +491,7 @@ main(int argc, char **argv) suite = CU_add_suite("raid5f", test_setup, test_cleanup); CU_ADD_TEST(suite, test_raid5f_start); + CU_ADD_TEST(suite, test_raid5f_submit_read_request); CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();