diff --git a/CHANGELOG.md b/CHANGELOG.md index f8be42cd2..92da743d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,14 @@ Added `dynamic` scheduler options: load_limit, core_limit, core_busy. Their desc are available in JSON-RPC document, in section [framework_set_scheduler](jsonrpc.html#rpc_framework_set_scheduler). +### raid + +Add concat as a special raid module. The concat module could create a virtual bdev. The +virtual bdev combines multiple underlying bdevs together. The layout of the underlying +bdevs is one after another. The concat bdev is extendable. When the free space of the +concat bdev is not enough, the user can deconstruct the concat bdev, then reconstruct it +with an additional underlying bdev. + ## v22.01 ### accel diff --git a/module/bdev/raid/Makefile b/module/bdev/raid/Makefile index b550e88e8..b122cf543 100644 --- a/module/bdev/raid/Makefile +++ b/module/bdev/raid/Makefile @@ -38,7 +38,7 @@ SO_VER := 4 SO_MINOR := 0 CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ -C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c +C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c concat.c ifeq ($(CONFIG_RAID5),y) C_SRCS += raid5.c diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index 60607595b..1d5389955 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -907,6 +907,7 @@ static struct { { "0", RAID0 }, { "raid5", RAID5 }, { "5", RAID5 }, + { "concat", CONCAT }, { } }; diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index 609820ed5..4e4bab590 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -40,6 +40,7 @@ enum raid_level { INVALID_RAID_LEVEL = -1, RAID0 = 0, RAID5 = 5, + CONCAT = 99, }; /* diff --git a/module/bdev/raid/concat.c b/module/bdev/raid/concat.c new file mode 100644 index 000000000..8f2e28461 --- /dev/null +++ b/module/bdev/raid/concat.c @@ -0,0 +1,350 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Peng Yu yupeng0921@gmail.com. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_raid.h" + +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk/log.h" + +struct concat_block_range { + uint64_t start; + uint64_t length; +}; + +/* + * brief: + * concat_bdev_io_completion function is called by lower layers to notify raid + * module that particular bdev_io is completed. + * params: + * bdev_io - pointer to bdev io submitted to lower layers, like child io + * success - bdev_io status + * cb_arg - function callback context (parent raid_bdev_io) + * returns: + * none + */ +static void +concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_bdev_io *raid_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + if (success) { + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } else { + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void +concat_submit_rw_request(struct raid_bdev_io *raid_io); + +static void +_concat_submit_rw_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; + + concat_submit_rw_request(raid_io); +} + +/* + * brief: + * concat_submit_rw_request function is used to submit I/O to the correct + * member disk for concat bdevs. + * params: + * raid_io + * returns: + * none + */ +static void +concat_submit_rw_request(struct raid_bdev_io *raid_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); + struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; + struct raid_bdev *raid_bdev = raid_io->raid_bdev; + struct concat_block_range *block_range = raid_bdev->module_private; + uint64_t pd_lba; + uint64_t pd_blocks; + int pd_idx; + int ret = 0; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + int i; + + pd_idx = -1; + for (i = 0; i < raid_bdev->num_base_bdevs; i++) { + if (block_range[i].start > bdev_io->u.bdev.offset_blocks) { + break; + } + pd_idx = i; + } + assert(pd_idx >= 0); + assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start); + pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start; + pd_blocks = bdev_io->u.bdev.num_blocks; + base_info = &raid_bdev->base_bdev_info[pd_idx]; + if (base_info->desc == NULL) { + SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); + assert(0); + } + + /* + * Submit child io to bdev layer with using base bdev descriptors, base + * bdev lba, base bdev child io length in blocks, buffer, completion + * function and function callback context + */ + assert(raid_ch != NULL); + assert(raid_ch->base_channel); + base_ch = raid_ch->base_channel[pd_idx]; + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + pd_lba, pd_blocks, concat_bdev_io_completion, + raid_io); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + pd_lba, pd_blocks, concat_bdev_io_completion, + raid_io); + } else { + SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); + assert(0); + } + + if (ret == -ENOMEM) { + raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, + _concat_submit_rw_request); + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void +concat_submit_null_payload_request(struct raid_bdev_io *raid_io); + +static void +_concat_submit_null_payload_request(void *_raid_io) +{ + struct raid_bdev_io *raid_io = _raid_io; + + concat_submit_null_payload_request(raid_io); +} + +static void +concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_bdev_io *raid_io = cb_arg; + + raid_bdev_io_complete_part(raid_io, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); + + spdk_bdev_free_io(bdev_io); +} + +/* + * brief: + * concat_submit_null_payload_request function submits the next batch of + * io requests with range but without payload, like FLUSH and UNMAP, to member disks; + * it will submit as many as possible unless one base io request fails with -ENOMEM, + * in which case it will queue itself for later submission. + * params: + * bdev_io - pointer to parent bdev_io on raid bdev device + * returns: + * none + */ +static void +concat_submit_null_payload_request(struct raid_bdev_io *raid_io) +{ + struct spdk_bdev_io *bdev_io; + struct raid_bdev *raid_bdev; + int ret; + struct raid_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + uint64_t pd_lba; + uint64_t pd_blocks; + uint64_t offset_blocks; + uint64_t num_blocks; + struct concat_block_range *block_range; + int i, start_idx, stop_idx; + + bdev_io = spdk_bdev_io_from_ctx(raid_io); + raid_bdev = raid_io->raid_bdev; + block_range = raid_bdev->module_private; + + offset_blocks = bdev_io->u.bdev.offset_blocks; + num_blocks = bdev_io->u.bdev.num_blocks; + start_idx = -1; + stop_idx = -1; + /* + * Go through all base bdevs, find the first bdev and the last bdev + */ + for (i = 0; i < raid_bdev->num_base_bdevs; i++) { + /* skip the bdevs before the offset_blocks */ + if (offset_blocks >= block_range[i].start + block_range[i].length) { + continue; + } + if (start_idx == -1) { + start_idx = i; + } else { + /* + * The offset_blocks might be at the middle of the first bdev. + * Besides the first bdev, the offset_blocks should be always + * at the start of the bdev. + */ + assert(offset_blocks == block_range[i].start); + } + pd_lba = offset_blocks - block_range[i].start; + pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); + offset_blocks += pd_blocks; + num_blocks -= pd_blocks; + if (num_blocks == 0) { + stop_idx = i; + break; + } + } + assert(start_idx >= 0); + assert(stop_idx >= 0); + + if (raid_io->base_bdev_io_remaining == 0) { + raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1; + } + offset_blocks = bdev_io->u.bdev.offset_blocks; + num_blocks = bdev_io->u.bdev.num_blocks; + for (i = start_idx; i <= stop_idx; i++) { + assert(offset_blocks >= block_range[i].start); + assert(offset_blocks < block_range[i].start + block_range[i].length); + pd_lba = offset_blocks - block_range[i].start; + pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); + offset_blocks += pd_blocks; + num_blocks -= pd_blocks; + /* + * Skip the IOs we have submitted + */ + if (i < start_idx + raid_io->base_bdev_io_submitted) { + continue; + } + base_info = &raid_bdev->base_bdev_info[i]; + base_ch = raid_io->raid_ch->base_channel[i]; + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_UNMAP: + ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch, + pd_lba, pd_blocks, + concat_base_io_complete, raid_io); + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + ret = spdk_bdev_flush_blocks(base_info->desc, base_ch, + pd_lba, pd_blocks, + concat_base_io_complete, raid_io); + break; + default: + SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); + assert(false); + ret = -EIO; + } + if (ret == 0) { + raid_io->base_bdev_io_submitted++; + } else if (ret == -ENOMEM) { + raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, + _concat_submit_null_payload_request); + return; + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + } +} + +static int concat_start(struct raid_bdev *raid_bdev) +{ + uint64_t total_blockcnt = 0; + struct raid_base_bdev_info *base_info; + struct concat_block_range *block_range; + + block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range)); + if (!block_range) { + SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u", + raid_bdev->num_base_bdevs); + return -ENOMEM; + } + + int idx = 0; + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift; + uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift; + + block_range[idx].start = total_blockcnt; + block_range[idx].length = pd_block_cnt; + total_blockcnt += pd_block_cnt; + idx++; + } + + raid_bdev->module_private = block_range; + + SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", + total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); + raid_bdev->bdev.blockcnt = total_blockcnt; + + raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; + raid_bdev->bdev.split_on_optimal_io_boundary = true; + + return 0; +} + +static void +concat_stop(struct raid_bdev *raid_bdev) +{ + struct concat_block_range *block_range = raid_bdev->module_private; + + free(block_range); +} + +static struct raid_bdev_module g_concat_module = { + .level = CONCAT, + .base_bdevs_min = 1, + .start = concat_start, + .stop = concat_stop, + .submit_rw_request = concat_submit_rw_request, + .submit_null_payload_request = concat_submit_null_payload_request, +}; +RAID_MODULE_REGISTER(&g_concat_module) + +SPDK_LOG_REGISTER_COMPONENT(bdev_concat) diff --git a/scripts/rpc.py b/scripts/rpc.py index 05a8268f7..55cee26de 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -1944,7 +1944,7 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse help='Create new raid bdev') p.add_argument('-n', '--name', help='raid bdev name', required=True) p.add_argument('-z', '--strip-size-kb', help='strip size in KB', type=int) - p.add_argument('-r', '--raid-level', help='raid level, only raid level 0 is supported', required=True) + p.add_argument('-r', '--raid-level', help='raid level, raid0 and a special level concat are supported', required=True) p.add_argument('-b', '--base-bdevs', help='base bdevs name, whitespace separated list in quotes', required=True) p.set_defaults(func=bdev_raid_create) diff --git a/test/bdev/bdev_raid.sh b/test/bdev/bdev_raid.sh index c85d33f6e..a0a8f0f2d 100755 --- a/test/bdev/bdev_raid.sh +++ b/test/bdev/bdev_raid.sh @@ -60,12 +60,13 @@ function on_error_exit() { } function configure_raid_bdev() { + local raid_level=$1 rm -rf $testdir/rpcs.txt cat <<- EOL >> $testdir/rpcs.txt bdev_malloc_create 32 512 -b Base_1 bdev_malloc_create 32 512 -b Base_2 - bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n raid0 + bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid EOL $rpc_py < $testdir/rpcs.txt @@ -73,6 +74,7 @@ function configure_raid_bdev() { } function raid_function_test() { + local raid_level=$1 if [ $(uname -s) = Linux ] && modprobe -n nbd; then local nbd=/dev/nbd0 local raid_bdev @@ -83,7 +85,7 @@ function raid_function_test() { echo "Process raid pid: $raid_pid" waitforlisten $raid_pid $rpc_server - configure_raid_bdev + configure_raid_bdev $raid_level raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1) if [ $raid_bdev = "" ]; then echo "No raid0 device in SPDK app" @@ -114,6 +116,7 @@ function raid_function_test() { trap 'on_error_exit;' ERR -raid_function_test +raid_function_test raid0 +raid_function_test concat rm -f $tmp_file diff --git a/test/bdev/blockdev.sh b/test/bdev/blockdev.sh index 4898c27ed..0f2a9713d 100755 --- a/test/bdev/blockdev.sh +++ b/test/bdev/blockdev.sh @@ -44,8 +44,11 @@ function setup_bdev_conf() { bdev_malloc_create -b Malloc3 32 512 bdev_malloc_create -b Malloc4 32 512 bdev_malloc_create -b Malloc5 32 512 + bdev_malloc_create -b Malloc6 32 512 + bdev_malloc_create -b Malloc7 32 512 bdev_passthru_create -p TestPT -b Malloc3 bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5" + bdev_raid_create -n concat0 -z 64 -r concat -b "Malloc6 Malloc7" bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3 bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0 RPC diff --git a/test/unit/lib/bdev/raid/Makefile b/test/unit/lib/bdev/raid/Makefile index 0090a85ce..2dfb54126 100644 --- a/test/unit/lib/bdev/raid/Makefile +++ b/test/unit/lib/bdev/raid/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = bdev_raid.c +DIRS-y = bdev_raid.c concat.c DIRS-$(CONFIG_RAID5) += raid5.c diff --git a/test/unit/lib/bdev/raid/concat.c/Makefile b/test/unit/lib/bdev/raid/concat.c/Makefile new file mode 100644 index 000000000..2a07a2be3 --- /dev/null +++ b/test/unit/lib/bdev/raid/concat.c/Makefile @@ -0,0 +1,5 @@ +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../../..) + +TEST_FILE = concat_ut.c + +include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk diff --git a/test/unit/lib/bdev/raid/concat.c/concat_ut.c b/test/unit/lib/bdev/raid/concat.c/concat_ut.c new file mode 100644 index 000000000..52c4dade3 --- /dev/null +++ b/test/unit/lib/bdev/raid/concat.c/concat_ut.c @@ -0,0 +1,545 @@ +#include "spdk/stdinc.h" +#include "spdk_cunit.h" +#include "spdk/env.h" +#include "thread/thread_internal.h" +#include "spdk_internal/mock.h" + +#include "bdev/raid/bdev_raid.h" +#include "bdev/raid/concat.c" + +#define BLOCK_LEN (4096) + +enum CONCAT_IO_TYPE { + CONCAT_NONE = 0, + CONCAT_WRITEV, + CONCAT_READV, + CONCAT_FLUSH, + CONCAT_UNMAP, +}; + +struct spdk_bdev_desc { + struct spdk_bdev *bdev; +}; + +#define MAX_RECORDS (10) +/* + * Store the information of io requests sent to the underlying bdevs. + * For a single null payload request to the concat bdev, + * we may send multiple requests to the underling bdevs, + * so we store the io request information to arrays. + */ +struct req_records { + uint64_t offset_blocks[MAX_RECORDS]; + uint64_t num_blocks[MAX_RECORDS]; + enum CONCAT_IO_TYPE io_type[MAX_RECORDS]; + int count; +} g_req_records; + +/* + * g_succeed is true means the spdk_bdev_readv/writev/unmap/flush_blocks + * functions will return 0. + * g_succeed is false means the spdk_bdev_readv/writev/unmap/flush_blocks + * functions will return -ENOMEM. + * We always set it to false before an IO request, then the raid_bdev_queue_io_wait + * function will re-submit the request, and the raid_bdev_queue_io_wait function will + * set g_succeed to true, then the IO will succeed next time. + */ +bool g_succeed; + +DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module)); +DEFINE_STUB_V(raid_bdev_io_complete, (struct raid_bdev_io *raid_io, + enum spdk_bdev_io_status status)); +DEFINE_STUB_V(spdk_bdev_free_io, (struct spdk_bdev_io *bdev_io)); +DEFINE_STUB(raid_bdev_io_complete_part, bool, + (struct raid_bdev_io *raid_io, uint64_t completed, + enum spdk_bdev_io_status status), + true); + +int +spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (g_succeed) { + int i = g_req_records.count; + + g_req_records.offset_blocks[i] = offset_blocks; + g_req_records.num_blocks[i] = num_blocks; + g_req_records.io_type[i] = CONCAT_READV; + g_req_records.count++; + cb(NULL, true, cb_arg); + return 0; + } else { + return -ENOMEM; + } +} + +int +spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (g_succeed) { + int i = g_req_records.count; + + g_req_records.offset_blocks[i] = offset_blocks; + g_req_records.num_blocks[i] = num_blocks; + g_req_records.io_type[i] = CONCAT_WRITEV; + g_req_records.count++; + cb(NULL, true, cb_arg); + return 0; + } else { + return -ENOMEM; + } +} + +int +spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (g_succeed) { + int i = g_req_records.count; + + g_req_records.offset_blocks[i] = offset_blocks; + g_req_records.num_blocks[i] = num_blocks; + g_req_records.io_type[i] = CONCAT_UNMAP; + g_req_records.count++; + cb(NULL, true, cb_arg); + return 0; + } else { + return -ENOMEM; + } +} + +int +spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + if (g_succeed) { + int i = g_req_records.count; + + g_req_records.offset_blocks[i] = offset_blocks; + g_req_records.num_blocks[i] = num_blocks; + g_req_records.io_type[i] = CONCAT_FLUSH; + g_req_records.count++; + cb(NULL, true, cb_arg); + return 0; + } else { + return -ENOMEM; + } +} + +void +raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) +{ + g_succeed = true; + cb_fn(raid_io); +} + +static void +init_globals(void) +{ + int i; + + for (i = 0; i < MAX_RECORDS; i++) { + g_req_records.offset_blocks[i] = 0; + g_req_records.num_blocks[i] = 0; + g_req_records.io_type[i] = CONCAT_NONE; + } + g_req_records.count = 0; + g_succeed = false; +} + +struct concat_params { + uint8_t num_base_bdevs; + uint64_t base_bdev_blockcnt; + uint32_t base_bdev_blocklen; + uint32_t strip_size; +}; + +static struct concat_params *g_params; +static size_t g_params_count; + +#define ARRAY_FOR_EACH(a, e) \ + for (e = a; e < a + SPDK_COUNTOF(a); e++) + +#define CONCAT_PARAMS_FOR_EACH(p) \ + for (p = g_params; p < g_params + g_params_count; p++) + +static int +test_setup(void) +{ + uint8_t num_base_bdevs_values[] = { 3, 4, 5 }; + uint64_t base_bdev_blockcnt_values[] = { 1, 1024, 1024 * 1024 }; + uint32_t base_bdev_blocklen_values[] = { 512, 4096 }; + uint32_t strip_size_kb_values[] = { 1, 4, 128 }; + uint8_t *num_base_bdevs; + uint64_t *base_bdev_blockcnt; + uint32_t *base_bdev_blocklen; + uint32_t *strip_size_kb; + struct concat_params *params; + + g_params_count = SPDK_COUNTOF(num_base_bdevs_values) * + SPDK_COUNTOF(base_bdev_blockcnt_values) * + SPDK_COUNTOF(base_bdev_blocklen_values) * + SPDK_COUNTOF(strip_size_kb_values); + g_params = calloc(g_params_count, sizeof(*g_params)); + if (!g_params) { + return -ENOMEM; + } + + params = g_params; + + ARRAY_FOR_EACH(num_base_bdevs_values, num_base_bdevs) { + ARRAY_FOR_EACH(base_bdev_blockcnt_values, base_bdev_blockcnt) { + ARRAY_FOR_EACH(base_bdev_blocklen_values, base_bdev_blocklen) { + ARRAY_FOR_EACH(strip_size_kb_values, strip_size_kb) { + params->num_base_bdevs = *num_base_bdevs; + params->base_bdev_blockcnt = *base_bdev_blockcnt; + params->base_bdev_blocklen = *base_bdev_blocklen; + params->strip_size = *strip_size_kb * 1024 / *base_bdev_blocklen; + if (params->strip_size == 0 || + params->strip_size > *base_bdev_blockcnt) { + g_params_count--; + continue; + } + params++; + } + } + } + } + + return 0; +} + +static int +test_cleanup(void) +{ + free(g_params); + return 0; +} + +static struct raid_bdev * +create_raid_bdev(struct concat_params *params) +{ + struct raid_bdev *raid_bdev; + struct raid_base_bdev_info *base_info; + + raid_bdev = calloc(1, sizeof(*raid_bdev)); + SPDK_CU_ASSERT_FATAL(raid_bdev != NULL); + + raid_bdev->module = &g_concat_module; + raid_bdev->num_base_bdevs = params->num_base_bdevs; + raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, + sizeof(struct raid_base_bdev_info)); + SPDK_CU_ASSERT_FATAL(raid_bdev->base_bdev_info != NULL); + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + base_info->bdev = calloc(1, sizeof(*base_info->bdev)); + SPDK_CU_ASSERT_FATAL(base_info->bdev != NULL); + base_info->desc = calloc(1, sizeof(*base_info->desc)); + SPDK_CU_ASSERT_FATAL(base_info->desc != NULL); + + base_info->bdev->blockcnt = params->base_bdev_blockcnt; + base_info->bdev->blocklen = params->base_bdev_blocklen; + } + + raid_bdev->strip_size = params->strip_size; + raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); + raid_bdev->bdev.blocklen = params->base_bdev_blocklen; + + return raid_bdev; +} + +static void +delete_raid_bdev(struct raid_bdev *raid_bdev) +{ + struct raid_base_bdev_info *base_info; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + free(base_info->bdev); + free(base_info->desc); + } + free(raid_bdev->base_bdev_info); + free(raid_bdev); +} + +static struct raid_bdev * +create_concat(struct concat_params *params) +{ + struct raid_bdev *raid_bdev = create_raid_bdev(params); + + CU_ASSERT(concat_start(raid_bdev) == 0); + return raid_bdev; +} + +static void +delete_concat(struct raid_bdev *raid_bdev) +{ + concat_stop(raid_bdev); + delete_raid_bdev(raid_bdev); +} + +static void +test_concat_start(void) +{ + struct raid_bdev *raid_bdev; + struct concat_params *params; + struct concat_block_range *block_range; + uint64_t total_blockcnt; + int i; + + CONCAT_PARAMS_FOR_EACH(params) { + raid_bdev = create_concat(params); + block_range = raid_bdev->module_private; + total_blockcnt = 0; + for (i = 0; i < params->num_base_bdevs; i++) { + CU_ASSERT(block_range[i].start == total_blockcnt); + CU_ASSERT(block_range[i].length == params->base_bdev_blockcnt); + total_blockcnt += params->base_bdev_blockcnt; + } + delete_concat(raid_bdev); + } +} + +static void +bdev_io_cleanup(struct spdk_bdev_io *bdev_io) +{ + if (bdev_io->u.bdev.iovs) { + if (bdev_io->u.bdev.iovs->iov_base) { + free(bdev_io->u.bdev.iovs->iov_base); + } + free(bdev_io->u.bdev.iovs); + } + free(bdev_io); +} + +static void +bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch, struct spdk_bdev *bdev, + uint64_t lba, uint64_t blocks, int16_t iotype) +{ + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + + bdev_io->bdev = bdev; + bdev_io->u.bdev.offset_blocks = lba; + bdev_io->u.bdev.num_blocks = blocks; + bdev_io->type = iotype; + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) { + return; + } + + bdev_io->u.bdev.iovcnt = 1; + bdev_io->u.bdev.iovs = calloc(1, sizeof(struct iovec)); + SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs != NULL); + bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * 4096); + SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL); + bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * BLOCK_LEN; + bdev_io->internal.ch = channel; +} + +static void +submit_and_verify_rw(enum CONCAT_IO_TYPE io_type, struct concat_params *params) +{ + struct raid_bdev *raid_bdev; + struct spdk_bdev_io *bdev_io; + struct spdk_io_channel *ch; + struct raid_bdev_io *raid_io; + struct raid_bdev_io_channel *raid_ch; + uint64_t lba, blocks; + int i; + + lba = 0; + blocks = 1; + for (i = 0; i < params->num_base_bdevs; i++) { + init_globals(); + raid_bdev = create_concat(params); + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(raid_ch != NULL); + raid_ch->base_channel = calloc(params->num_base_bdevs, + sizeof(struct spdk_io_channel)); + SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL); + raid_io->raid_ch = raid_ch; + raid_io->raid_bdev = raid_bdev; + ch = calloc(1, sizeof(struct spdk_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + switch (io_type) { + case CONCAT_WRITEV: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_WRITE); + concat_submit_rw_request(raid_io); + break; + case CONCAT_READV: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_READ); + concat_submit_rw_request(raid_io); + break; + case CONCAT_UNMAP: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP); + concat_submit_null_payload_request(raid_io); + break; + case CONCAT_FLUSH: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH); + concat_submit_null_payload_request(raid_io); + break; + default: + CU_ASSERT(false); + } + + /* + * We submit request to the first lba of each underlying device, + * so the offset of the underling device should always be 0. + */ + CU_ASSERT(g_req_records.offset_blocks[0] == 0); + CU_ASSERT(g_req_records.num_blocks[0] == blocks); + CU_ASSERT(g_req_records.io_type[0] == io_type); + CU_ASSERT(g_req_records.count == 1); + bdev_io_cleanup(bdev_io); + free(ch); + free(raid_ch->base_channel); + free(raid_ch); + delete_concat(raid_bdev); + lba += params->base_bdev_blockcnt; + } +} + +static void +test_concat_rw(void) +{ + struct concat_params *params; + enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_WRITEV, CONCAT_READV}; + enum CONCAT_IO_TYPE io_type; + int i; + + CONCAT_PARAMS_FOR_EACH(params) { + for (i = 0; i < 2; i ++) { + io_type = io_type_list[i]; + submit_and_verify_rw(io_type, params); + } + } +} + +static void +submit_and_verify_null_payload(enum CONCAT_IO_TYPE io_type, struct concat_params *params) +{ + struct raid_bdev *raid_bdev; + struct spdk_bdev_io *bdev_io; + struct spdk_io_channel *ch; + struct raid_bdev_io *raid_io; + struct raid_bdev_io_channel *raid_ch; + uint64_t lba, blocks; + + /* + * In this unittest, all base bdevs have the same blockcnt. + * If the base_bdev_blockcnt > 1, the request will start from + * the second bdev, and across two bdevs. + * If the base_bdev_blockcnt == 1, the request will start from + * the third bdev. In this case, if there are only 3 bdevs, + * we can not set blocks to base_bdev_blockcnt + 1 because the request + * will be beyond the end of the last bdev, so we set the blocks to 1 + */ + lba = params->base_bdev_blockcnt + 1; + if (params->base_bdev_blockcnt == 1 && params->num_base_bdevs == 3) { + blocks = 1; + } else { + blocks = params->base_bdev_blockcnt + 1; + } + init_globals(); + raid_bdev = create_concat(params); + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(raid_ch != NULL); + raid_ch->base_channel = calloc(params->num_base_bdevs, + sizeof(struct spdk_io_channel)); + SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL); + raid_io->raid_ch = raid_ch; + raid_io->raid_bdev = raid_bdev; + ch = calloc(1, sizeof(struct spdk_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + switch (io_type) { + case CONCAT_UNMAP: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP); + concat_submit_null_payload_request(raid_io); + break; + case CONCAT_FLUSH: + bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH); + concat_submit_null_payload_request(raid_io); + break; + default: + CU_ASSERT(false); + } + + if (params->base_bdev_blockcnt == 1) { + if (params->num_base_bdevs == 3) { + CU_ASSERT(g_req_records.count == 1); + CU_ASSERT(g_req_records.offset_blocks[0] == 0); + CU_ASSERT(g_req_records.num_blocks[0] == 1); + } else { + CU_ASSERT(g_req_records.count == 2); + CU_ASSERT(g_req_records.offset_blocks[0] == 0); + CU_ASSERT(g_req_records.num_blocks[0] == 1); + CU_ASSERT(g_req_records.io_type[0] == io_type); + CU_ASSERT(g_req_records.offset_blocks[1] == 0); + CU_ASSERT(g_req_records.num_blocks[1] == 1); + CU_ASSERT(g_req_records.io_type[1] == io_type); + } + } else { + CU_ASSERT(g_req_records.count == 2); + CU_ASSERT(g_req_records.offset_blocks[0] == 1); + CU_ASSERT(g_req_records.num_blocks[0] == params->base_bdev_blockcnt - 1); + CU_ASSERT(g_req_records.io_type[0] == io_type); + CU_ASSERT(g_req_records.offset_blocks[1] == 0); + CU_ASSERT(g_req_records.num_blocks[1] == 2); + CU_ASSERT(g_req_records.io_type[1] == io_type); + } + bdev_io_cleanup(bdev_io); + free(ch); + free(raid_ch->base_channel); + free(raid_ch); + delete_concat(raid_bdev); +} + +static void +test_concat_null_payload(void) +{ + struct concat_params *params; + enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_FLUSH, CONCAT_UNMAP}; + enum CONCAT_IO_TYPE io_type; + int i; + + CONCAT_PARAMS_FOR_EACH(params) { + for (i = 0; i < 2; i ++) { + io_type = io_type_list[i]; + submit_and_verify_null_payload(io_type, params); + } + } +} + +int +main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + CU_set_error_action(CUEA_ABORT); + CU_initialize_registry(); + + suite = CU_add_suite("concat", test_setup, test_cleanup); + CU_ADD_TEST(suite, test_concat_start); + CU_ADD_TEST(suite, test_concat_rw); + CU_ADD_TEST(suite, test_concat_null_payload); + + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/test/unit/unittest.sh b/test/unit/unittest.sh index 344a7d6db..e69a4fca2 100755 --- a/test/unit/unittest.sh +++ b/test/unit/unittest.sh @@ -16,6 +16,7 @@ function unittest_bdev() { $valgrind $testdir/lib/bdev/bdev.c/bdev_ut $valgrind $testdir/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut $valgrind $testdir/lib/bdev/raid/bdev_raid.c/bdev_raid_ut + $valgrind $testdir/lib/bdev/raid/concat.c/concat_ut $valgrind $testdir/lib/bdev/bdev_zone.c/bdev_zone_ut $valgrind $testdir/lib/bdev/gpt/gpt.c/gpt_ut $valgrind $testdir/lib/bdev/part.c/part_ut