bdev/raid: Add concat module

The concat module can combine multiple underlying bdevs to a single bdev. It is a special raid level. You can add a new bdev to the end of the concat bdev, then the concat bdev size is increased, and it won't change the layout of the exist data. This is the major difference between concat and raid0. If you add a new underling device to raid0, the whole data layout will be changed. So the concat bdev is extentable. Change-Id: Ibbeeaf0606ff79b595320c597a5605ab9e4e13c4 Signed-off-by: Peng Yu <yupeng0921@gmail.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11070 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
2022-02-01 06:46:20 +00:00 · 2022-02-01 06:46:20 +00:00 · 64eebbd132
commit 64eebbd132
parent 428b17a0a8
12 changed files with 923 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -94,6 +94,14 @@ Added `dynamic` scheduler options: load_limit, core_limit, core_busy. Their desc
 are available in JSON-RPC document, in section
 [framework_set_scheduler](jsonrpc.html#rpc_framework_set_scheduler).
 ### raid
 Add concat as a special raid module. The concat module could create a virtual bdev.  The
 virtual bdev combines multiple underlying bdevs together. The layout of the underlying
 bdevs is one after another. The concat bdev is extendable. When the free space of the
 concat bdev is not enough, the user can deconstruct the concat bdev, then reconstruct it
 with an additional underlying bdev.
 ## v22.01
 ### accel
--- a/module/bdev/raid/Makefile
+++ b/module/bdev/raid/Makefile
@ -38,7 +38,7 @@ SO_VER := 4
 SO_MINOR := 0
 CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
-C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c
+C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c concat.c
 ifeq ($(CONFIG_RAID5),y)
 C_SRCS += raid5.c
--- a/module/bdev/raid/bdev_raid.c
+++ b/module/bdev/raid/bdev_raid.c
@ -907,6 +907,7 @@ static struct {
 	{ "0", RAID0 },
 	{ "raid5", RAID5 },
 	{ "5", RAID5 },
 	{ "concat", CONCAT },
 	{ }
 };
--- a/module/bdev/raid/bdev_raid.h
+++ b/module/bdev/raid/bdev_raid.h
@ -40,6 +40,7 @@ enum raid_level {
 	INVALID_RAID_LEVEL	= -1,
 	RAID0			= 0,
 	RAID5			= 5,
 	CONCAT			= 99,
 };
 /*
--- a/module/bdev/raid/concat.c
+++ b/module/bdev/raid/concat.c
@ -0,0 +1,350 @@
 /*-
 *   BSD LICENSE
 *
 *   Copyright (c) Peng Yu yupeng0921@gmail.com.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 #include "bdev_raid.h"
 #include "spdk/env.h"
 #include "spdk/thread.h"
 #include "spdk/string.h"
 #include "spdk/util.h"
 #include "spdk/log.h"
 struct concat_block_range {
 	uint64_t start;
 	uint64_t length;
 };
 /*
 * brief:
 * concat_bdev_io_completion function is called by lower layers to notify raid
 * module that particular bdev_io is completed.
 * params:
 * bdev_io - pointer to bdev io submitted to lower layers, like child io
 * success - bdev_io status
 * cb_arg - function callback context (parent raid_bdev_io)
 * returns:
 * none
 */
 static void
 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
 {
 	struct raid_bdev_io *raid_io = cb_arg;
 	spdk_bdev_free_io(bdev_io);
 	if (success) {
 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
 	} else {
 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
 	}
 }
 static void
 concat_submit_rw_request(struct raid_bdev_io *raid_io);
 static void
 _concat_submit_rw_request(void *_raid_io)
 {
 	struct raid_bdev_io *raid_io = _raid_io;
 	concat_submit_rw_request(raid_io);
 }
 /*
 * brief:
 * concat_submit_rw_request function is used to submit I/O to the correct
 * member disk for concat bdevs.
 * params:
 * raid_io
 * returns:
 * none
 */
 static void
 concat_submit_rw_request(struct raid_bdev_io *raid_io)
 {
 	struct spdk_bdev_io		*bdev_io = spdk_bdev_io_from_ctx(raid_io);
 	struct raid_bdev_io_channel	*raid_ch = raid_io->raid_ch;
 	struct raid_bdev		*raid_bdev = raid_io->raid_bdev;
 	struct concat_block_range	*block_range = raid_bdev->module_private;
 	uint64_t			pd_lba;
 	uint64_t			pd_blocks;
 	int				pd_idx;
 	int				ret = 0;
 	struct raid_base_bdev_info	*base_info;
 	struct spdk_io_channel		*base_ch;
 	int i;
 	pd_idx = -1;
 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
 		if (block_range[i].start > bdev_io->u.bdev.offset_blocks) {
 			break;
 		}
 		pd_idx = i;
 	}
 	assert(pd_idx >= 0);
 	assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start);
 	pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start;
 	pd_blocks = bdev_io->u.bdev.num_blocks;
 	base_info = &raid_bdev->base_bdev_info[pd_idx];
 	if (base_info->desc == NULL) {
 		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
 		assert(0);
 	}
 	/*
 	 * Submit child io to bdev layer with using base bdev descriptors, base
 	 * bdev lba, base bdev child io length in blocks, buffer, completion
 	 * function and function callback context
 	 */
 	assert(raid_ch != NULL);
 	assert(raid_ch->base_channel);
 	base_ch = raid_ch->base_channel[pd_idx];
 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
 		ret = spdk_bdev_readv_blocks(base_info->desc, base_ch,
 					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
 					     pd_lba, pd_blocks, concat_bdev_io_completion,
 					     raid_io);
 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
 		ret = spdk_bdev_writev_blocks(base_info->desc, base_ch,
 					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
 					      pd_lba, pd_blocks, concat_bdev_io_completion,
 					      raid_io);
 	} else {
 		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
 		assert(0);
 	}
 	if (ret == -ENOMEM) {
 		raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
 					_concat_submit_rw_request);
 	} else if (ret != 0) {
 		SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
 		assert(false);
 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
 	}
 }
 static void
 concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
 static void
 _concat_submit_null_payload_request(void *_raid_io)
 {
 	struct raid_bdev_io *raid_io = _raid_io;
 	concat_submit_null_payload_request(raid_io);
 }
 static void
 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
 {
 	struct raid_bdev_io *raid_io = cb_arg;
 	raid_bdev_io_complete_part(raid_io, 1, success ?
 				   SPDK_BDEV_IO_STATUS_SUCCESS :
 				   SPDK_BDEV_IO_STATUS_FAILED);
 	spdk_bdev_free_io(bdev_io);
 }
 /*
 * brief:
 * concat_submit_null_payload_request function submits the next batch of
 * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
 * it will submit as many as possible unless one base io request fails with -ENOMEM,
 * in which case it will queue itself for later submission.
 * params:
 * bdev_io - pointer to parent bdev_io on raid bdev device
 * returns:
 * none
 */
 static void
 concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
 {
 	struct spdk_bdev_io		*bdev_io;
 	struct raid_bdev		*raid_bdev;
 	int				ret;
 	struct raid_base_bdev_info	*base_info;
 	struct spdk_io_channel		*base_ch;
 	uint64_t			pd_lba;
 	uint64_t			pd_blocks;
 	uint64_t			offset_blocks;
 	uint64_t			num_blocks;
 	struct concat_block_range	*block_range;
 	int				i, start_idx, stop_idx;
 	bdev_io = spdk_bdev_io_from_ctx(raid_io);
 	raid_bdev = raid_io->raid_bdev;
 	block_range = raid_bdev->module_private;
 	offset_blocks = bdev_io->u.bdev.offset_blocks;
 	num_blocks = bdev_io->u.bdev.num_blocks;
 	start_idx = -1;
 	stop_idx = -1;
 	/*
 	 * Go through all base bdevs, find the first bdev and the last bdev
 	 */
 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
 		/* skip the bdevs before the offset_blocks */
 		if (offset_blocks >= block_range[i].start + block_range[i].length) {
 			continue;
 		}
 		if (start_idx == -1) {
 			start_idx = i;
 		} else {
 			/*
 			 * The offset_blocks might be at the middle of the first bdev.
 			 * Besides the first bdev, the offset_blocks should be always
 			 * at the start of the bdev.
 			 */
 			assert(offset_blocks == block_range[i].start);
 		}
 		pd_lba = offset_blocks - block_range[i].start;
 		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
 		offset_blocks += pd_blocks;
 		num_blocks -= pd_blocks;
 		if (num_blocks == 0) {
 			stop_idx = i;
 			break;
 		}
 	}
 	assert(start_idx >= 0);
 	assert(stop_idx >= 0);
 	if (raid_io->base_bdev_io_remaining == 0) {
 		raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
 	}
 	offset_blocks = bdev_io->u.bdev.offset_blocks;
 	num_blocks = bdev_io->u.bdev.num_blocks;
 	for (i = start_idx; i <= stop_idx; i++) {
 		assert(offset_blocks >= block_range[i].start);
 		assert(offset_blocks < block_range[i].start + block_range[i].length);
 		pd_lba = offset_blocks -  block_range[i].start;
 		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
 		offset_blocks += pd_blocks;
 		num_blocks -= pd_blocks;
 		/*
 		 * Skip the IOs we have submitted
 		 */
 		if (i < start_idx + raid_io->base_bdev_io_submitted) {
 			continue;
 		}
 		base_info = &raid_bdev->base_bdev_info[i];
 		base_ch = raid_io->raid_ch->base_channel[i];
 		switch (bdev_io->type) {
 		case SPDK_BDEV_IO_TYPE_UNMAP:
 			ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
 						     pd_lba, pd_blocks,
 						     concat_base_io_complete, raid_io);
 			break;
 		case SPDK_BDEV_IO_TYPE_FLUSH:
 			ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
 						     pd_lba, pd_blocks,
 						     concat_base_io_complete, raid_io);
 			break;
 		default:
 			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
 			assert(false);
 			ret = -EIO;
 		}
 		if (ret == 0) {
 			raid_io->base_bdev_io_submitted++;
 		} else if (ret == -ENOMEM) {
 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
 						_concat_submit_null_payload_request);
 			return;
 		} else {
 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
 			assert(false);
 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
 			return;
 		}
 	}
 }
 static int concat_start(struct raid_bdev *raid_bdev)
 {
 	uint64_t total_blockcnt = 0;
 	struct raid_base_bdev_info *base_info;
 	struct concat_block_range *block_range;
 	block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
 	if (!block_range) {
 		SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
 			    raid_bdev->num_base_bdevs);
 		return -ENOMEM;
 	}
 	int idx = 0;
 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
 		uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift;
 		uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
 		block_range[idx].start = total_blockcnt;
 		block_range[idx].length = pd_block_cnt;
 		total_blockcnt += pd_block_cnt;
 		idx++;
 	}
 	raid_bdev->module_private = block_range;
 	SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
 		      total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
 	raid_bdev->bdev.blockcnt = total_blockcnt;
 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
 	return 0;
 }
 static void
 concat_stop(struct raid_bdev *raid_bdev)
 {
 	struct concat_block_range *block_range = raid_bdev->module_private;
 	free(block_range);
 }
 static struct raid_bdev_module g_concat_module = {
 	.level = CONCAT,
 	.base_bdevs_min = 1,
 	.start = concat_start,
 	.stop = concat_stop,
 	.submit_rw_request = concat_submit_rw_request,
 	.submit_null_payload_request = concat_submit_null_payload_request,
 };
 RAID_MODULE_REGISTER(&g_concat_module)
 SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
--- a/scripts/rpc.py
+++ b/scripts/rpc.py
@ -1944,7 +1944,7 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
                              help='Create new raid bdev')
    p.add_argument('-n', '--name', help='raid bdev name', required=True)
    p.add_argument('-z', '--strip-size-kb', help='strip size in KB', type=int)
-    p.add_argument('-r', '--raid-level', help='raid level, only raid level 0 is supported', required=True)
+    p.add_argument('-r', '--raid-level', help='raid level, raid0 and a special level concat are supported', required=True)
    p.add_argument('-b', '--base-bdevs', help='base bdevs name, whitespace separated list in quotes', required=True)
    p.set_defaults(func=bdev_raid_create)
--- a/test/bdev/bdev_raid.sh
+++ b/test/bdev/bdev_raid.sh
@ -60,12 +60,13 @@ function on_error_exit() {
 }
 function configure_raid_bdev() {
 	local raid_level=$1
 	rm -rf $testdir/rpcs.txt
 	cat <<- EOL >> $testdir/rpcs.txt
 		bdev_malloc_create 32 512 -b Base_1
 		bdev_malloc_create 32 512 -b Base_2
-		bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n raid0
+		bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
 	EOL
 	$rpc_py < $testdir/rpcs.txt
@ -73,6 +74,7 @@ function configure_raid_bdev() {
 }
 function raid_function_test() {
 	local raid_level=$1
 	if [ $(uname -s) = Linux ] && modprobe -n nbd; then
 		local nbd=/dev/nbd0
 		local raid_bdev
@ -83,7 +85,7 @@ function raid_function_test() {
 		echo "Process raid pid: $raid_pid"
 		waitforlisten $raid_pid $rpc_server
-		configure_raid_bdev
+		configure_raid_bdev $raid_level
 		raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1)
 		if [ $raid_bdev = "" ]; then
 			echo "No raid0 device in SPDK app"
@ -114,6 +116,7 @@ function raid_function_test() {
 trap 'on_error_exit;' ERR
-raid_function_test
+raid_function_test raid0
 raid_function_test concat
 rm -f $tmp_file
--- a/test/bdev/blockdev.sh
+++ b/test/bdev/blockdev.sh
@ -44,8 +44,11 @@ function setup_bdev_conf() {
 		bdev_malloc_create -b Malloc3 32 512
 		bdev_malloc_create -b Malloc4 32 512
 		bdev_malloc_create -b Malloc5 32 512
 		bdev_malloc_create -b Malloc6 32 512
 		bdev_malloc_create -b Malloc7 32 512
 		bdev_passthru_create -p TestPT -b Malloc3
 		bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5"
 		bdev_raid_create -n concat0 -z 64 -r concat -b "Malloc6 Malloc7"
 		bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3
 		bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0
 	RPC
--- a/test/unit/lib/bdev/raid/Makefile
+++ b/test/unit/lib/bdev/raid/Makefile
@ -34,7 +34,7 @@
 SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..)
 include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
-DIRS-y = bdev_raid.c
+DIRS-y = bdev_raid.c concat.c
 DIRS-$(CONFIG_RAID5) += raid5.c
--- a/test/unit/lib/bdev/raid/concat.c/Makefile
+++ b/test/unit/lib/bdev/raid/concat.c/Makefile
@ -0,0 +1,5 @@
 SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../../..)
 TEST_FILE = concat_ut.c
 include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk
--- a/test/unit/lib/bdev/raid/concat.c/concat_ut.c
+++ b/test/unit/lib/bdev/raid/concat.c/concat_ut.c
@ -0,0 +1,545 @@
 #include "spdk/stdinc.h"
 #include "spdk_cunit.h"
 #include "spdk/env.h"
 #include "thread/thread_internal.h"
 #include "spdk_internal/mock.h"
 #include "bdev/raid/bdev_raid.h"
 #include "bdev/raid/concat.c"
 #define BLOCK_LEN (4096)
 enum CONCAT_IO_TYPE {
 	CONCAT_NONE = 0,
 	CONCAT_WRITEV,
 	CONCAT_READV,
 	CONCAT_FLUSH,
 	CONCAT_UNMAP,
 };
 struct spdk_bdev_desc {
 	struct spdk_bdev *bdev;
 };
 #define MAX_RECORDS (10)
 /*
 * Store the information of io requests sent to the underlying bdevs.
 * For a single null payload request to the concat bdev,
 * we may send multiple requests to the underling bdevs,
 * so we store the io request information to arrays.
 */
 struct req_records {
 	uint64_t offset_blocks[MAX_RECORDS];
 	uint64_t num_blocks[MAX_RECORDS];
 	enum CONCAT_IO_TYPE io_type[MAX_RECORDS];
 	int count;
 } g_req_records;
 /*
 * g_succeed is true means the spdk_bdev_readv/writev/unmap/flush_blocks
 * functions will return 0.
 * g_succeed is false means the spdk_bdev_readv/writev/unmap/flush_blocks
 * functions will return -ENOMEM.
 * We always set it to false before an IO request, then the raid_bdev_queue_io_wait
 * function will re-submit the request, and the raid_bdev_queue_io_wait function will
 * set g_succeed to true, then the IO will succeed next time.
 */
 bool g_succeed;
 DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
 DEFINE_STUB_V(raid_bdev_io_complete, (struct raid_bdev_io *raid_io,
 				      enum spdk_bdev_io_status status));
 DEFINE_STUB_V(spdk_bdev_free_io, (struct spdk_bdev_io *bdev_io));
 DEFINE_STUB(raid_bdev_io_complete_part, bool,
 	    (struct raid_bdev_io *raid_io, uint64_t completed,
 	     enum spdk_bdev_io_status status),
 	    true);
 int
 spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 		       struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks,
 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
 {
 	if (g_succeed) {
 		int i = g_req_records.count;
 		g_req_records.offset_blocks[i] = offset_blocks;
 		g_req_records.num_blocks[i] = num_blocks;
 		g_req_records.io_type[i] = CONCAT_READV;
 		g_req_records.count++;
 		cb(NULL, true, cb_arg);
 		return 0;
 	} else {
 		return -ENOMEM;
 	}
 }
 int
 spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 			struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks,
 			spdk_bdev_io_completion_cb cb, void *cb_arg)
 {
 	if (g_succeed) {
 		int i = g_req_records.count;
 		g_req_records.offset_blocks[i] = offset_blocks;
 		g_req_records.num_blocks[i] = num_blocks;
 		g_req_records.io_type[i] = CONCAT_WRITEV;
 		g_req_records.count++;
 		cb(NULL, true, cb_arg);
 		return 0;
 	} else {
 		return -ENOMEM;
 	}
 }
 int
 spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 		       uint64_t offset_blocks, uint64_t num_blocks,
 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
 {
 	if (g_succeed) {
 		int i = g_req_records.count;
 		g_req_records.offset_blocks[i] = offset_blocks;
 		g_req_records.num_blocks[i] = num_blocks;
 		g_req_records.io_type[i] = CONCAT_UNMAP;
 		g_req_records.count++;
 		cb(NULL, true, cb_arg);
 		return 0;
 	} else {
 		return -ENOMEM;
 	}
 }
 int
 spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
 		       uint64_t offset_blocks, uint64_t num_blocks,
 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
 {
 	if (g_succeed) {
 		int i = g_req_records.count;
 		g_req_records.offset_blocks[i] = offset_blocks;
 		g_req_records.num_blocks[i] = num_blocks;
 		g_req_records.io_type[i] = CONCAT_FLUSH;
 		g_req_records.count++;
 		cb(NULL, true, cb_arg);
 		return 0;
 	} else {
 		return -ENOMEM;
 	}
 }
 void
 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
 {
 	g_succeed = true;
 	cb_fn(raid_io);
 }
 static void
 init_globals(void)
 {
 	int i;
 	for (i = 0; i < MAX_RECORDS; i++) {
 		g_req_records.offset_blocks[i] = 0;
 		g_req_records.num_blocks[i] = 0;
 		g_req_records.io_type[i] = CONCAT_NONE;
 	}
 	g_req_records.count = 0;
 	g_succeed = false;
 }
 struct concat_params {
 	uint8_t num_base_bdevs;
 	uint64_t base_bdev_blockcnt;
 	uint32_t base_bdev_blocklen;
 	uint32_t strip_size;
 };
 static struct concat_params *g_params;
 static size_t g_params_count;
 #define ARRAY_FOR_EACH(a, e) \
 	for (e = a; e < a + SPDK_COUNTOF(a); e++)
 #define CONCAT_PARAMS_FOR_EACH(p) \
 	for (p = g_params; p < g_params + g_params_count; p++)
 static int
 test_setup(void)
 {
 	uint8_t num_base_bdevs_values[] = { 3, 4, 5 };
 	uint64_t base_bdev_blockcnt_values[] = { 1, 1024, 1024 * 1024 };
 	uint32_t base_bdev_blocklen_values[] = { 512, 4096 };
 	uint32_t strip_size_kb_values[] = { 1, 4, 128 };
 	uint8_t *num_base_bdevs;
 	uint64_t *base_bdev_blockcnt;
 	uint32_t *base_bdev_blocklen;
 	uint32_t *strip_size_kb;
 	struct concat_params *params;
 	g_params_count = SPDK_COUNTOF(num_base_bdevs_values) *
 			 SPDK_COUNTOF(base_bdev_blockcnt_values) *
 			 SPDK_COUNTOF(base_bdev_blocklen_values) *
 			 SPDK_COUNTOF(strip_size_kb_values);
 	g_params = calloc(g_params_count, sizeof(*g_params));
 	if (!g_params) {
 		return -ENOMEM;
 	}
 	params = g_params;
 	ARRAY_FOR_EACH(num_base_bdevs_values, num_base_bdevs) {
 		ARRAY_FOR_EACH(base_bdev_blockcnt_values, base_bdev_blockcnt) {
 			ARRAY_FOR_EACH(base_bdev_blocklen_values, base_bdev_blocklen) {
 				ARRAY_FOR_EACH(strip_size_kb_values, strip_size_kb) {
 					params->num_base_bdevs = *num_base_bdevs;
 					params->base_bdev_blockcnt = *base_bdev_blockcnt;
 					params->base_bdev_blocklen = *base_bdev_blocklen;
 					params->strip_size = *strip_size_kb * 1024 / *base_bdev_blocklen;
 					if (params->strip_size == 0 ||
 					    params->strip_size > *base_bdev_blockcnt) {
 						g_params_count--;
 						continue;
 					}
 					params++;
 				}
 			}
 		}
 	}
 	return 0;
 }
 static int
 test_cleanup(void)
 {
 	free(g_params);
 	return 0;
 }
 static struct raid_bdev *
 create_raid_bdev(struct concat_params *params)
 {
 	struct raid_bdev *raid_bdev;
 	struct raid_base_bdev_info *base_info;
 	raid_bdev = calloc(1, sizeof(*raid_bdev));
 	SPDK_CU_ASSERT_FATAL(raid_bdev != NULL);
 	raid_bdev->module = &g_concat_module;
 	raid_bdev->num_base_bdevs = params->num_base_bdevs;
 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
 					   sizeof(struct raid_base_bdev_info));
 	SPDK_CU_ASSERT_FATAL(raid_bdev->base_bdev_info != NULL);
 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
 		base_info->bdev = calloc(1, sizeof(*base_info->bdev));
 		SPDK_CU_ASSERT_FATAL(base_info->bdev != NULL);
 		base_info->desc = calloc(1, sizeof(*base_info->desc));
 		SPDK_CU_ASSERT_FATAL(base_info->desc != NULL);
 		base_info->bdev->blockcnt = params->base_bdev_blockcnt;
 		base_info->bdev->blocklen = params->base_bdev_blocklen;
 	}
 	raid_bdev->strip_size = params->strip_size;
 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
 	raid_bdev->bdev.blocklen = params->base_bdev_blocklen;
 	return raid_bdev;
 }
 static void
 delete_raid_bdev(struct raid_bdev *raid_bdev)
 {
 	struct raid_base_bdev_info *base_info;
 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
 		free(base_info->bdev);
 		free(base_info->desc);
 	}
 	free(raid_bdev->base_bdev_info);
 	free(raid_bdev);
 }
 static struct raid_bdev *
 create_concat(struct concat_params *params)
 {
 	struct raid_bdev *raid_bdev = create_raid_bdev(params);
 	CU_ASSERT(concat_start(raid_bdev) == 0);
 	return raid_bdev;
 }
 static void
 delete_concat(struct raid_bdev *raid_bdev)
 {
 	concat_stop(raid_bdev);
 	delete_raid_bdev(raid_bdev);
 }
 static void
 test_concat_start(void)
 {
 	struct raid_bdev *raid_bdev;
 	struct concat_params *params;
 	struct concat_block_range *block_range;
 	uint64_t total_blockcnt;
 	int i;
 	CONCAT_PARAMS_FOR_EACH(params) {
 		raid_bdev = create_concat(params);
 		block_range = raid_bdev->module_private;
 		total_blockcnt = 0;
 		for (i = 0; i < params->num_base_bdevs; i++) {
 			CU_ASSERT(block_range[i].start == total_blockcnt);
 			CU_ASSERT(block_range[i].length == params->base_bdev_blockcnt);
 			total_blockcnt += params->base_bdev_blockcnt;
 		}
 		delete_concat(raid_bdev);
 	}
 }
 static void
 bdev_io_cleanup(struct spdk_bdev_io *bdev_io)
 {
 	if (bdev_io->u.bdev.iovs) {
 		if (bdev_io->u.bdev.iovs->iov_base) {
 			free(bdev_io->u.bdev.iovs->iov_base);
 		}
 		free(bdev_io->u.bdev.iovs);
 	}
 	free(bdev_io);
 }
 static void
 bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch, struct spdk_bdev *bdev,
 		   uint64_t lba, uint64_t blocks, int16_t iotype)
 {
 	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
 	bdev_io->bdev = bdev;
 	bdev_io->u.bdev.offset_blocks = lba;
 	bdev_io->u.bdev.num_blocks = blocks;
 	bdev_io->type = iotype;
 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
 		return;
 	}
 	bdev_io->u.bdev.iovcnt = 1;
 	bdev_io->u.bdev.iovs = calloc(1, sizeof(struct iovec));
 	SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs != NULL);
 	bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * 4096);
 	SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL);
 	bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * BLOCK_LEN;
 	bdev_io->internal.ch = channel;
 }
 static void
 submit_and_verify_rw(enum CONCAT_IO_TYPE io_type, struct concat_params *params)
 {
 	struct raid_bdev *raid_bdev;
 	struct spdk_bdev_io *bdev_io;
 	struct spdk_io_channel *ch;
 	struct raid_bdev_io *raid_io;
 	struct raid_bdev_io_channel *raid_ch;
 	uint64_t lba, blocks;
 	int i;
 	lba = 0;
 	blocks = 1;
 	for (i = 0; i < params->num_base_bdevs; i++) {
 		init_globals();
 		raid_bdev = create_concat(params);
 		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
 		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
 		raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
 		raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel));
 		SPDK_CU_ASSERT_FATAL(raid_ch != NULL);
 		raid_ch->base_channel = calloc(params->num_base_bdevs,
 					       sizeof(struct spdk_io_channel));
 		SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL);
 		raid_io->raid_ch = raid_ch;
 		raid_io->raid_bdev = raid_bdev;
 		ch = calloc(1, sizeof(struct spdk_io_channel));
 		SPDK_CU_ASSERT_FATAL(ch != NULL);
 		switch (io_type) {
 		case CONCAT_WRITEV:
 			bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_WRITE);
 			concat_submit_rw_request(raid_io);
 			break;
 		case CONCAT_READV:
 			bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_READ);
 			concat_submit_rw_request(raid_io);
 			break;
 		case CONCAT_UNMAP:
 			bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP);
 			concat_submit_null_payload_request(raid_io);
 			break;
 		case CONCAT_FLUSH:
 			bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH);
 			concat_submit_null_payload_request(raid_io);
 			break;
 		default:
 			CU_ASSERT(false);
 		}
 		/*
 		 * We submit request to the first lba of each underlying device,
 		 * so the offset of the underling device should always be 0.
 		 */
 		CU_ASSERT(g_req_records.offset_blocks[0] == 0);
 		CU_ASSERT(g_req_records.num_blocks[0] == blocks);
 		CU_ASSERT(g_req_records.io_type[0] == io_type);
 		CU_ASSERT(g_req_records.count == 1);
 		bdev_io_cleanup(bdev_io);
 		free(ch);
 		free(raid_ch->base_channel);
 		free(raid_ch);
 		delete_concat(raid_bdev);
 		lba += params->base_bdev_blockcnt;
 	}
 }
 static void
 test_concat_rw(void)
 {
 	struct concat_params *params;
 	enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_WRITEV, CONCAT_READV};
 	enum CONCAT_IO_TYPE io_type;
 	int i;
 	CONCAT_PARAMS_FOR_EACH(params) {
 		for (i = 0; i < 2; i ++) {
 			io_type = io_type_list[i];
 			submit_and_verify_rw(io_type, params);
 		}
 	}
 }
 static void
 submit_and_verify_null_payload(enum CONCAT_IO_TYPE io_type, struct concat_params *params)
 {
 	struct raid_bdev *raid_bdev;
 	struct spdk_bdev_io *bdev_io;
 	struct spdk_io_channel *ch;
 	struct raid_bdev_io *raid_io;
 	struct raid_bdev_io_channel *raid_ch;
 	uint64_t lba, blocks;
 	/*
 	 * In this unittest, all base bdevs have the same blockcnt.
 	 * If the base_bdev_blockcnt > 1, the request will start from
 	 * the second bdev, and across two bdevs.
 	 * If the base_bdev_blockcnt == 1, the request will start from
 	 * the third bdev. In this case, if there are only 3 bdevs,
 	 * we can not set blocks to base_bdev_blockcnt + 1 because the request
 	 * will be beyond the end of the last bdev, so we set the blocks to 1
 	 */
 	lba = params->base_bdev_blockcnt + 1;
 	if (params->base_bdev_blockcnt == 1 && params->num_base_bdevs == 3) {
 		blocks = 1;
 	} else {
 		blocks = params->base_bdev_blockcnt + 1;
 	}
 	init_globals();
 	raid_bdev = create_concat(params);
 	bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
 	SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
 	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
 	raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel));
 	SPDK_CU_ASSERT_FATAL(raid_ch != NULL);
 	raid_ch->base_channel = calloc(params->num_base_bdevs,
 				       sizeof(struct spdk_io_channel));
 	SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL);
 	raid_io->raid_ch = raid_ch;
 	raid_io->raid_bdev = raid_bdev;
 	ch = calloc(1, sizeof(struct spdk_io_channel));
 	SPDK_CU_ASSERT_FATAL(ch != NULL);
 	switch (io_type) {
 	case CONCAT_UNMAP:
 		bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP);
 		concat_submit_null_payload_request(raid_io);
 		break;
 	case CONCAT_FLUSH:
 		bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH);
 		concat_submit_null_payload_request(raid_io);
 		break;
 	default:
 		CU_ASSERT(false);
 	}
 	if (params->base_bdev_blockcnt == 1) {
 		if (params->num_base_bdevs == 3) {
 			CU_ASSERT(g_req_records.count == 1);
 			CU_ASSERT(g_req_records.offset_blocks[0] == 0);
 			CU_ASSERT(g_req_records.num_blocks[0] == 1);
 		} else {
 			CU_ASSERT(g_req_records.count == 2);
 			CU_ASSERT(g_req_records.offset_blocks[0] == 0);
 			CU_ASSERT(g_req_records.num_blocks[0] == 1);
 			CU_ASSERT(g_req_records.io_type[0] == io_type);
 			CU_ASSERT(g_req_records.offset_blocks[1] == 0);
 			CU_ASSERT(g_req_records.num_blocks[1] == 1);
 			CU_ASSERT(g_req_records.io_type[1] == io_type);
 		}
 	} else {
 		CU_ASSERT(g_req_records.count == 2);
 		CU_ASSERT(g_req_records.offset_blocks[0] == 1);
 		CU_ASSERT(g_req_records.num_blocks[0] == params->base_bdev_blockcnt - 1);
 		CU_ASSERT(g_req_records.io_type[0] == io_type);
 		CU_ASSERT(g_req_records.offset_blocks[1] == 0);
 		CU_ASSERT(g_req_records.num_blocks[1] == 2);
 		CU_ASSERT(g_req_records.io_type[1] == io_type);
 	}
 	bdev_io_cleanup(bdev_io);
 	free(ch);
 	free(raid_ch->base_channel);
 	free(raid_ch);
 	delete_concat(raid_bdev);
 }
 static void
 test_concat_null_payload(void)
 {
 	struct concat_params *params;
 	enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_FLUSH, CONCAT_UNMAP};
 	enum CONCAT_IO_TYPE io_type;
 	int i;
 	CONCAT_PARAMS_FOR_EACH(params) {
 		for (i = 0; i < 2; i ++) {
 			io_type = io_type_list[i];
 			submit_and_verify_null_payload(io_type, params);
 		}
 	}
 }
 int
 main(int argc, char **argv)
 {
 	CU_pSuite suite = NULL;
 	unsigned int num_failures;
 	CU_set_error_action(CUEA_ABORT);
 	CU_initialize_registry();
 	suite = CU_add_suite("concat", test_setup, test_cleanup);
 	CU_ADD_TEST(suite, test_concat_start);
 	CU_ADD_TEST(suite, test_concat_rw);
 	CU_ADD_TEST(suite, test_concat_null_payload);
 	CU_basic_set_mode(CU_BRM_VERBOSE);
 	CU_basic_run_tests();
 	num_failures = CU_get_number_of_failures();
 	CU_cleanup_registry();
 	return num_failures;
 }
--- a/test/unit/unittest.sh
+++ b/test/unit/unittest.sh
@ -16,6 +16,7 @@ function unittest_bdev() {
 	$valgrind $testdir/lib/bdev/bdev.c/bdev_ut
 	$valgrind $testdir/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut
 	$valgrind $testdir/lib/bdev/raid/bdev_raid.c/bdev_raid_ut
 	$valgrind $testdir/lib/bdev/raid/concat.c/concat_ut
 	$valgrind $testdir/lib/bdev/bdev_zone.c/bdev_zone_ut
 	$valgrind $testdir/lib/bdev/gpt/gpt.c/gpt_ut
 	$valgrind $testdir/lib/bdev/part.c/part_ut