bdev/raid: Add concat module

The concat module can combine multiple underlying bdevs to a single
bdev. It is a special raid level. You can add a new bdev to the end of
the concat bdev, then the concat bdev size is increased, and it won't
change the layout of the exist data. This is the major difference
between concat and raid0. If you add a new underling device to raid0,
the whole data layout will be changed. So the concat bdev is extentable.

Change-Id: Ibbeeaf0606ff79b595320c597a5605ab9e4e13c4
Signed-off-by: Peng Yu <yupeng0921@gmail.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11070
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
yupeng 2022-02-01 06:46:20 +00:00 committed by Tomasz Zawadzki
parent 428b17a0a8
commit 64eebbd132
12 changed files with 923 additions and 6 deletions

View File

@ -94,6 +94,14 @@ Added `dynamic` scheduler options: load_limit, core_limit, core_busy. Their desc
are available in JSON-RPC document, in section are available in JSON-RPC document, in section
[framework_set_scheduler](jsonrpc.html#rpc_framework_set_scheduler). [framework_set_scheduler](jsonrpc.html#rpc_framework_set_scheduler).
### raid
Add concat as a special raid module. The concat module could create a virtual bdev. The
virtual bdev combines multiple underlying bdevs together. The layout of the underlying
bdevs is one after another. The concat bdev is extendable. When the free space of the
concat bdev is not enough, the user can deconstruct the concat bdev, then reconstruct it
with an additional underlying bdev.
## v22.01 ## v22.01
### accel ### accel

View File

@ -38,7 +38,7 @@ SO_VER := 4
SO_MINOR := 0 SO_MINOR := 0
CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c concat.c
ifeq ($(CONFIG_RAID5),y) ifeq ($(CONFIG_RAID5),y)
C_SRCS += raid5.c C_SRCS += raid5.c

View File

@ -907,6 +907,7 @@ static struct {
{ "0", RAID0 }, { "0", RAID0 },
{ "raid5", RAID5 }, { "raid5", RAID5 },
{ "5", RAID5 }, { "5", RAID5 },
{ "concat", CONCAT },
{ } { }
}; };

View File

@ -40,6 +40,7 @@ enum raid_level {
INVALID_RAID_LEVEL = -1, INVALID_RAID_LEVEL = -1,
RAID0 = 0, RAID0 = 0,
RAID5 = 5, RAID5 = 5,
CONCAT = 99,
}; };
/* /*

350
module/bdev/raid/concat.c Normal file
View File

@ -0,0 +1,350 @@
/*-
* BSD LICENSE
*
* Copyright (c) Peng Yu yupeng0921@gmail.com.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bdev_raid.h"
#include "spdk/env.h"
#include "spdk/thread.h"
#include "spdk/string.h"
#include "spdk/util.h"
#include "spdk/log.h"
struct concat_block_range {
uint64_t start;
uint64_t length;
};
/*
* brief:
* concat_bdev_io_completion function is called by lower layers to notify raid
* module that particular bdev_io is completed.
* params:
* bdev_io - pointer to bdev io submitted to lower layers, like child io
* success - bdev_io status
* cb_arg - function callback context (parent raid_bdev_io)
* returns:
* none
*/
static void
concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct raid_bdev_io *raid_io = cb_arg;
spdk_bdev_free_io(bdev_io);
if (success) {
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
} else {
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void
concat_submit_rw_request(struct raid_bdev_io *raid_io);
static void
_concat_submit_rw_request(void *_raid_io)
{
struct raid_bdev_io *raid_io = _raid_io;
concat_submit_rw_request(raid_io);
}
/*
* brief:
* concat_submit_rw_request function is used to submit I/O to the correct
* member disk for concat bdevs.
* params:
* raid_io
* returns:
* none
*/
static void
concat_submit_rw_request(struct raid_bdev_io *raid_io)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
struct concat_block_range *block_range = raid_bdev->module_private;
uint64_t pd_lba;
uint64_t pd_blocks;
int pd_idx;
int ret = 0;
struct raid_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
int i;
pd_idx = -1;
for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
if (block_range[i].start > bdev_io->u.bdev.offset_blocks) {
break;
}
pd_idx = i;
}
assert(pd_idx >= 0);
assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start);
pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start;
pd_blocks = bdev_io->u.bdev.num_blocks;
base_info = &raid_bdev->base_bdev_info[pd_idx];
if (base_info->desc == NULL) {
SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
assert(0);
}
/*
* Submit child io to bdev layer with using base bdev descriptors, base
* bdev lba, base bdev child io length in blocks, buffer, completion
* function and function callback context
*/
assert(raid_ch != NULL);
assert(raid_ch->base_channel);
base_ch = raid_ch->base_channel[pd_idx];
if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
ret = spdk_bdev_readv_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
pd_lba, pd_blocks, concat_bdev_io_completion,
raid_io);
} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
ret = spdk_bdev_writev_blocks(base_info->desc, base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
pd_lba, pd_blocks, concat_bdev_io_completion,
raid_io);
} else {
SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
assert(0);
}
if (ret == -ENOMEM) {
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
_concat_submit_rw_request);
} else if (ret != 0) {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
static void
concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
static void
_concat_submit_null_payload_request(void *_raid_io)
{
struct raid_bdev_io *raid_io = _raid_io;
concat_submit_null_payload_request(raid_io);
}
static void
concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct raid_bdev_io *raid_io = cb_arg;
raid_bdev_io_complete_part(raid_io, 1, success ?
SPDK_BDEV_IO_STATUS_SUCCESS :
SPDK_BDEV_IO_STATUS_FAILED);
spdk_bdev_free_io(bdev_io);
}
/*
* brief:
* concat_submit_null_payload_request function submits the next batch of
* io requests with range but without payload, like FLUSH and UNMAP, to member disks;
* it will submit as many as possible unless one base io request fails with -ENOMEM,
* in which case it will queue itself for later submission.
* params:
* bdev_io - pointer to parent bdev_io on raid bdev device
* returns:
* none
*/
static void
concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
{
struct spdk_bdev_io *bdev_io;
struct raid_bdev *raid_bdev;
int ret;
struct raid_base_bdev_info *base_info;
struct spdk_io_channel *base_ch;
uint64_t pd_lba;
uint64_t pd_blocks;
uint64_t offset_blocks;
uint64_t num_blocks;
struct concat_block_range *block_range;
int i, start_idx, stop_idx;
bdev_io = spdk_bdev_io_from_ctx(raid_io);
raid_bdev = raid_io->raid_bdev;
block_range = raid_bdev->module_private;
offset_blocks = bdev_io->u.bdev.offset_blocks;
num_blocks = bdev_io->u.bdev.num_blocks;
start_idx = -1;
stop_idx = -1;
/*
* Go through all base bdevs, find the first bdev and the last bdev
*/
for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
/* skip the bdevs before the offset_blocks */
if (offset_blocks >= block_range[i].start + block_range[i].length) {
continue;
}
if (start_idx == -1) {
start_idx = i;
} else {
/*
* The offset_blocks might be at the middle of the first bdev.
* Besides the first bdev, the offset_blocks should be always
* at the start of the bdev.
*/
assert(offset_blocks == block_range[i].start);
}
pd_lba = offset_blocks - block_range[i].start;
pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
offset_blocks += pd_blocks;
num_blocks -= pd_blocks;
if (num_blocks == 0) {
stop_idx = i;
break;
}
}
assert(start_idx >= 0);
assert(stop_idx >= 0);
if (raid_io->base_bdev_io_remaining == 0) {
raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
}
offset_blocks = bdev_io->u.bdev.offset_blocks;
num_blocks = bdev_io->u.bdev.num_blocks;
for (i = start_idx; i <= stop_idx; i++) {
assert(offset_blocks >= block_range[i].start);
assert(offset_blocks < block_range[i].start + block_range[i].length);
pd_lba = offset_blocks - block_range[i].start;
pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
offset_blocks += pd_blocks;
num_blocks -= pd_blocks;
/*
* Skip the IOs we have submitted
*/
if (i < start_idx + raid_io->base_bdev_io_submitted) {
continue;
}
base_info = &raid_bdev->base_bdev_info[i];
base_ch = raid_io->raid_ch->base_channel[i];
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_UNMAP:
ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
pd_lba, pd_blocks,
concat_base_io_complete, raid_io);
break;
case SPDK_BDEV_IO_TYPE_FLUSH:
ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
pd_lba, pd_blocks,
concat_base_io_complete, raid_io);
break;
default:
SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
assert(false);
ret = -EIO;
}
if (ret == 0) {
raid_io->base_bdev_io_submitted++;
} else if (ret == -ENOMEM) {
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
_concat_submit_null_payload_request);
return;
} else {
SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
assert(false);
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
}
}
static int concat_start(struct raid_bdev *raid_bdev)
{
uint64_t total_blockcnt = 0;
struct raid_base_bdev_info *base_info;
struct concat_block_range *block_range;
block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
if (!block_range) {
SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
raid_bdev->num_base_bdevs);
return -ENOMEM;
}
int idx = 0;
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift;
uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
block_range[idx].start = total_blockcnt;
block_range[idx].length = pd_block_cnt;
total_blockcnt += pd_block_cnt;
idx++;
}
raid_bdev->module_private = block_range;
SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
raid_bdev->bdev.blockcnt = total_blockcnt;
raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
raid_bdev->bdev.split_on_optimal_io_boundary = true;
return 0;
}
static void
concat_stop(struct raid_bdev *raid_bdev)
{
struct concat_block_range *block_range = raid_bdev->module_private;
free(block_range);
}
static struct raid_bdev_module g_concat_module = {
.level = CONCAT,
.base_bdevs_min = 1,
.start = concat_start,
.stop = concat_stop,
.submit_rw_request = concat_submit_rw_request,
.submit_null_payload_request = concat_submit_null_payload_request,
};
RAID_MODULE_REGISTER(&g_concat_module)
SPDK_LOG_REGISTER_COMPONENT(bdev_concat)

View File

@ -1944,7 +1944,7 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
help='Create new raid bdev') help='Create new raid bdev')
p.add_argument('-n', '--name', help='raid bdev name', required=True) p.add_argument('-n', '--name', help='raid bdev name', required=True)
p.add_argument('-z', '--strip-size-kb', help='strip size in KB', type=int) p.add_argument('-z', '--strip-size-kb', help='strip size in KB', type=int)
p.add_argument('-r', '--raid-level', help='raid level, only raid level 0 is supported', required=True) p.add_argument('-r', '--raid-level', help='raid level, raid0 and a special level concat are supported', required=True)
p.add_argument('-b', '--base-bdevs', help='base bdevs name, whitespace separated list in quotes', required=True) p.add_argument('-b', '--base-bdevs', help='base bdevs name, whitespace separated list in quotes', required=True)
p.set_defaults(func=bdev_raid_create) p.set_defaults(func=bdev_raid_create)

View File

@ -60,12 +60,13 @@ function on_error_exit() {
} }
function configure_raid_bdev() { function configure_raid_bdev() {
local raid_level=$1
rm -rf $testdir/rpcs.txt rm -rf $testdir/rpcs.txt
cat <<- EOL >> $testdir/rpcs.txt cat <<- EOL >> $testdir/rpcs.txt
bdev_malloc_create 32 512 -b Base_1 bdev_malloc_create 32 512 -b Base_1
bdev_malloc_create 32 512 -b Base_2 bdev_malloc_create 32 512 -b Base_2
bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n raid0 bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
EOL EOL
$rpc_py < $testdir/rpcs.txt $rpc_py < $testdir/rpcs.txt
@ -73,6 +74,7 @@ function configure_raid_bdev() {
} }
function raid_function_test() { function raid_function_test() {
local raid_level=$1
if [ $(uname -s) = Linux ] && modprobe -n nbd; then if [ $(uname -s) = Linux ] && modprobe -n nbd; then
local nbd=/dev/nbd0 local nbd=/dev/nbd0
local raid_bdev local raid_bdev
@ -83,7 +85,7 @@ function raid_function_test() {
echo "Process raid pid: $raid_pid" echo "Process raid pid: $raid_pid"
waitforlisten $raid_pid $rpc_server waitforlisten $raid_pid $rpc_server
configure_raid_bdev configure_raid_bdev $raid_level
raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1) raid_bdev=$($rpc_py bdev_raid_get_bdevs online | cut -d ' ' -f 1)
if [ $raid_bdev = "" ]; then if [ $raid_bdev = "" ]; then
echo "No raid0 device in SPDK app" echo "No raid0 device in SPDK app"
@ -114,6 +116,7 @@ function raid_function_test() {
trap 'on_error_exit;' ERR trap 'on_error_exit;' ERR
raid_function_test raid_function_test raid0
raid_function_test concat
rm -f $tmp_file rm -f $tmp_file

View File

@ -44,8 +44,11 @@ function setup_bdev_conf() {
bdev_malloc_create -b Malloc3 32 512 bdev_malloc_create -b Malloc3 32 512
bdev_malloc_create -b Malloc4 32 512 bdev_malloc_create -b Malloc4 32 512
bdev_malloc_create -b Malloc5 32 512 bdev_malloc_create -b Malloc5 32 512
bdev_malloc_create -b Malloc6 32 512
bdev_malloc_create -b Malloc7 32 512
bdev_passthru_create -p TestPT -b Malloc3 bdev_passthru_create -p TestPT -b Malloc3
bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5" bdev_raid_create -n raid0 -z 64 -r 0 -b "Malloc4 Malloc5"
bdev_raid_create -n concat0 -z 64 -r concat -b "Malloc6 Malloc7"
bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3 bdev_set_qos_limit --rw_mbytes_per_sec 100 Malloc3
bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0 bdev_set_qos_limit --rw_ios_per_sec 20000 Malloc0
RPC RPC

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y = bdev_raid.c DIRS-y = bdev_raid.c concat.c
DIRS-$(CONFIG_RAID5) += raid5.c DIRS-$(CONFIG_RAID5) += raid5.c

View File

@ -0,0 +1,5 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../../..)
TEST_FILE = concat_ut.c
include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk

View File

@ -0,0 +1,545 @@
#include "spdk/stdinc.h"
#include "spdk_cunit.h"
#include "spdk/env.h"
#include "thread/thread_internal.h"
#include "spdk_internal/mock.h"
#include "bdev/raid/bdev_raid.h"
#include "bdev/raid/concat.c"
#define BLOCK_LEN (4096)
enum CONCAT_IO_TYPE {
CONCAT_NONE = 0,
CONCAT_WRITEV,
CONCAT_READV,
CONCAT_FLUSH,
CONCAT_UNMAP,
};
struct spdk_bdev_desc {
struct spdk_bdev *bdev;
};
#define MAX_RECORDS (10)
/*
* Store the information of io requests sent to the underlying bdevs.
* For a single null payload request to the concat bdev,
* we may send multiple requests to the underling bdevs,
* so we store the io request information to arrays.
*/
struct req_records {
uint64_t offset_blocks[MAX_RECORDS];
uint64_t num_blocks[MAX_RECORDS];
enum CONCAT_IO_TYPE io_type[MAX_RECORDS];
int count;
} g_req_records;
/*
* g_succeed is true means the spdk_bdev_readv/writev/unmap/flush_blocks
* functions will return 0.
* g_succeed is false means the spdk_bdev_readv/writev/unmap/flush_blocks
* functions will return -ENOMEM.
* We always set it to false before an IO request, then the raid_bdev_queue_io_wait
* function will re-submit the request, and the raid_bdev_queue_io_wait function will
* set g_succeed to true, then the IO will succeed next time.
*/
bool g_succeed;
DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
DEFINE_STUB_V(raid_bdev_io_complete, (struct raid_bdev_io *raid_io,
enum spdk_bdev_io_status status));
DEFINE_STUB_V(spdk_bdev_free_io, (struct spdk_bdev_io *bdev_io));
DEFINE_STUB(raid_bdev_io_complete_part, bool,
(struct raid_bdev_io *raid_io, uint64_t completed,
enum spdk_bdev_io_status status),
true);
int
spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (g_succeed) {
int i = g_req_records.count;
g_req_records.offset_blocks[i] = offset_blocks;
g_req_records.num_blocks[i] = num_blocks;
g_req_records.io_type[i] = CONCAT_READV;
g_req_records.count++;
cb(NULL, true, cb_arg);
return 0;
} else {
return -ENOMEM;
}
}
int
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (g_succeed) {
int i = g_req_records.count;
g_req_records.offset_blocks[i] = offset_blocks;
g_req_records.num_blocks[i] = num_blocks;
g_req_records.io_type[i] = CONCAT_WRITEV;
g_req_records.count++;
cb(NULL, true, cb_arg);
return 0;
} else {
return -ENOMEM;
}
}
int
spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (g_succeed) {
int i = g_req_records.count;
g_req_records.offset_blocks[i] = offset_blocks;
g_req_records.num_blocks[i] = num_blocks;
g_req_records.io_type[i] = CONCAT_UNMAP;
g_req_records.count++;
cb(NULL, true, cb_arg);
return 0;
} else {
return -ENOMEM;
}
}
int
spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
if (g_succeed) {
int i = g_req_records.count;
g_req_records.offset_blocks[i] = offset_blocks;
g_req_records.num_blocks[i] = num_blocks;
g_req_records.io_type[i] = CONCAT_FLUSH;
g_req_records.count++;
cb(NULL, true, cb_arg);
return 0;
} else {
return -ENOMEM;
}
}
void
raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
{
g_succeed = true;
cb_fn(raid_io);
}
static void
init_globals(void)
{
int i;
for (i = 0; i < MAX_RECORDS; i++) {
g_req_records.offset_blocks[i] = 0;
g_req_records.num_blocks[i] = 0;
g_req_records.io_type[i] = CONCAT_NONE;
}
g_req_records.count = 0;
g_succeed = false;
}
struct concat_params {
uint8_t num_base_bdevs;
uint64_t base_bdev_blockcnt;
uint32_t base_bdev_blocklen;
uint32_t strip_size;
};
static struct concat_params *g_params;
static size_t g_params_count;
#define ARRAY_FOR_EACH(a, e) \
for (e = a; e < a + SPDK_COUNTOF(a); e++)
#define CONCAT_PARAMS_FOR_EACH(p) \
for (p = g_params; p < g_params + g_params_count; p++)
static int
test_setup(void)
{
uint8_t num_base_bdevs_values[] = { 3, 4, 5 };
uint64_t base_bdev_blockcnt_values[] = { 1, 1024, 1024 * 1024 };
uint32_t base_bdev_blocklen_values[] = { 512, 4096 };
uint32_t strip_size_kb_values[] = { 1, 4, 128 };
uint8_t *num_base_bdevs;
uint64_t *base_bdev_blockcnt;
uint32_t *base_bdev_blocklen;
uint32_t *strip_size_kb;
struct concat_params *params;
g_params_count = SPDK_COUNTOF(num_base_bdevs_values) *
SPDK_COUNTOF(base_bdev_blockcnt_values) *
SPDK_COUNTOF(base_bdev_blocklen_values) *
SPDK_COUNTOF(strip_size_kb_values);
g_params = calloc(g_params_count, sizeof(*g_params));
if (!g_params) {
return -ENOMEM;
}
params = g_params;
ARRAY_FOR_EACH(num_base_bdevs_values, num_base_bdevs) {
ARRAY_FOR_EACH(base_bdev_blockcnt_values, base_bdev_blockcnt) {
ARRAY_FOR_EACH(base_bdev_blocklen_values, base_bdev_blocklen) {
ARRAY_FOR_EACH(strip_size_kb_values, strip_size_kb) {
params->num_base_bdevs = *num_base_bdevs;
params->base_bdev_blockcnt = *base_bdev_blockcnt;
params->base_bdev_blocklen = *base_bdev_blocklen;
params->strip_size = *strip_size_kb * 1024 / *base_bdev_blocklen;
if (params->strip_size == 0 ||
params->strip_size > *base_bdev_blockcnt) {
g_params_count--;
continue;
}
params++;
}
}
}
}
return 0;
}
static int
test_cleanup(void)
{
free(g_params);
return 0;
}
static struct raid_bdev *
create_raid_bdev(struct concat_params *params)
{
struct raid_bdev *raid_bdev;
struct raid_base_bdev_info *base_info;
raid_bdev = calloc(1, sizeof(*raid_bdev));
SPDK_CU_ASSERT_FATAL(raid_bdev != NULL);
raid_bdev->module = &g_concat_module;
raid_bdev->num_base_bdevs = params->num_base_bdevs;
raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
sizeof(struct raid_base_bdev_info));
SPDK_CU_ASSERT_FATAL(raid_bdev->base_bdev_info != NULL);
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
base_info->bdev = calloc(1, sizeof(*base_info->bdev));
SPDK_CU_ASSERT_FATAL(base_info->bdev != NULL);
base_info->desc = calloc(1, sizeof(*base_info->desc));
SPDK_CU_ASSERT_FATAL(base_info->desc != NULL);
base_info->bdev->blockcnt = params->base_bdev_blockcnt;
base_info->bdev->blocklen = params->base_bdev_blocklen;
}
raid_bdev->strip_size = params->strip_size;
raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
raid_bdev->bdev.blocklen = params->base_bdev_blocklen;
return raid_bdev;
}
static void
delete_raid_bdev(struct raid_bdev *raid_bdev)
{
struct raid_base_bdev_info *base_info;
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
free(base_info->bdev);
free(base_info->desc);
}
free(raid_bdev->base_bdev_info);
free(raid_bdev);
}
static struct raid_bdev *
create_concat(struct concat_params *params)
{
struct raid_bdev *raid_bdev = create_raid_bdev(params);
CU_ASSERT(concat_start(raid_bdev) == 0);
return raid_bdev;
}
static void
delete_concat(struct raid_bdev *raid_bdev)
{
concat_stop(raid_bdev);
delete_raid_bdev(raid_bdev);
}
static void
test_concat_start(void)
{
struct raid_bdev *raid_bdev;
struct concat_params *params;
struct concat_block_range *block_range;
uint64_t total_blockcnt;
int i;
CONCAT_PARAMS_FOR_EACH(params) {
raid_bdev = create_concat(params);
block_range = raid_bdev->module_private;
total_blockcnt = 0;
for (i = 0; i < params->num_base_bdevs; i++) {
CU_ASSERT(block_range[i].start == total_blockcnt);
CU_ASSERT(block_range[i].length == params->base_bdev_blockcnt);
total_blockcnt += params->base_bdev_blockcnt;
}
delete_concat(raid_bdev);
}
}
static void
bdev_io_cleanup(struct spdk_bdev_io *bdev_io)
{
if (bdev_io->u.bdev.iovs) {
if (bdev_io->u.bdev.iovs->iov_base) {
free(bdev_io->u.bdev.iovs->iov_base);
}
free(bdev_io->u.bdev.iovs);
}
free(bdev_io);
}
static void
bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch, struct spdk_bdev *bdev,
uint64_t lba, uint64_t blocks, int16_t iotype)
{
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
bdev_io->bdev = bdev;
bdev_io->u.bdev.offset_blocks = lba;
bdev_io->u.bdev.num_blocks = blocks;
bdev_io->type = iotype;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
return;
}
bdev_io->u.bdev.iovcnt = 1;
bdev_io->u.bdev.iovs = calloc(1, sizeof(struct iovec));
SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs != NULL);
bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * 4096);
SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL);
bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * BLOCK_LEN;
bdev_io->internal.ch = channel;
}
static void
submit_and_verify_rw(enum CONCAT_IO_TYPE io_type, struct concat_params *params)
{
struct raid_bdev *raid_bdev;
struct spdk_bdev_io *bdev_io;
struct spdk_io_channel *ch;
struct raid_bdev_io *raid_io;
struct raid_bdev_io_channel *raid_ch;
uint64_t lba, blocks;
int i;
lba = 0;
blocks = 1;
for (i = 0; i < params->num_base_bdevs; i++) {
init_globals();
raid_bdev = create_concat(params);
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel));
SPDK_CU_ASSERT_FATAL(raid_ch != NULL);
raid_ch->base_channel = calloc(params->num_base_bdevs,
sizeof(struct spdk_io_channel));
SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL);
raid_io->raid_ch = raid_ch;
raid_io->raid_bdev = raid_bdev;
ch = calloc(1, sizeof(struct spdk_io_channel));
SPDK_CU_ASSERT_FATAL(ch != NULL);
switch (io_type) {
case CONCAT_WRITEV:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_WRITE);
concat_submit_rw_request(raid_io);
break;
case CONCAT_READV:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_READ);
concat_submit_rw_request(raid_io);
break;
case CONCAT_UNMAP:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP);
concat_submit_null_payload_request(raid_io);
break;
case CONCAT_FLUSH:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH);
concat_submit_null_payload_request(raid_io);
break;
default:
CU_ASSERT(false);
}
/*
* We submit request to the first lba of each underlying device,
* so the offset of the underling device should always be 0.
*/
CU_ASSERT(g_req_records.offset_blocks[0] == 0);
CU_ASSERT(g_req_records.num_blocks[0] == blocks);
CU_ASSERT(g_req_records.io_type[0] == io_type);
CU_ASSERT(g_req_records.count == 1);
bdev_io_cleanup(bdev_io);
free(ch);
free(raid_ch->base_channel);
free(raid_ch);
delete_concat(raid_bdev);
lba += params->base_bdev_blockcnt;
}
}
static void
test_concat_rw(void)
{
struct concat_params *params;
enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_WRITEV, CONCAT_READV};
enum CONCAT_IO_TYPE io_type;
int i;
CONCAT_PARAMS_FOR_EACH(params) {
for (i = 0; i < 2; i ++) {
io_type = io_type_list[i];
submit_and_verify_rw(io_type, params);
}
}
}
static void
submit_and_verify_null_payload(enum CONCAT_IO_TYPE io_type, struct concat_params *params)
{
struct raid_bdev *raid_bdev;
struct spdk_bdev_io *bdev_io;
struct spdk_io_channel *ch;
struct raid_bdev_io *raid_io;
struct raid_bdev_io_channel *raid_ch;
uint64_t lba, blocks;
/*
* In this unittest, all base bdevs have the same blockcnt.
* If the base_bdev_blockcnt > 1, the request will start from
* the second bdev, and across two bdevs.
* If the base_bdev_blockcnt == 1, the request will start from
* the third bdev. In this case, if there are only 3 bdevs,
* we can not set blocks to base_bdev_blockcnt + 1 because the request
* will be beyond the end of the last bdev, so we set the blocks to 1
*/
lba = params->base_bdev_blockcnt + 1;
if (params->base_bdev_blockcnt == 1 && params->num_base_bdevs == 3) {
blocks = 1;
} else {
blocks = params->base_bdev_blockcnt + 1;
}
init_globals();
raid_bdev = create_concat(params);
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
raid_ch = calloc(1, sizeof(struct raid_bdev_io_channel));
SPDK_CU_ASSERT_FATAL(raid_ch != NULL);
raid_ch->base_channel = calloc(params->num_base_bdevs,
sizeof(struct spdk_io_channel));
SPDK_CU_ASSERT_FATAL(raid_ch->base_channel != NULL);
raid_io->raid_ch = raid_ch;
raid_io->raid_bdev = raid_bdev;
ch = calloc(1, sizeof(struct spdk_io_channel));
SPDK_CU_ASSERT_FATAL(ch != NULL);
switch (io_type) {
case CONCAT_UNMAP:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_UNMAP);
concat_submit_null_payload_request(raid_io);
break;
case CONCAT_FLUSH:
bdev_io_initialize(bdev_io, ch, &raid_bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_FLUSH);
concat_submit_null_payload_request(raid_io);
break;
default:
CU_ASSERT(false);
}
if (params->base_bdev_blockcnt == 1) {
if (params->num_base_bdevs == 3) {
CU_ASSERT(g_req_records.count == 1);
CU_ASSERT(g_req_records.offset_blocks[0] == 0);
CU_ASSERT(g_req_records.num_blocks[0] == 1);
} else {
CU_ASSERT(g_req_records.count == 2);
CU_ASSERT(g_req_records.offset_blocks[0] == 0);
CU_ASSERT(g_req_records.num_blocks[0] == 1);
CU_ASSERT(g_req_records.io_type[0] == io_type);
CU_ASSERT(g_req_records.offset_blocks[1] == 0);
CU_ASSERT(g_req_records.num_blocks[1] == 1);
CU_ASSERT(g_req_records.io_type[1] == io_type);
}
} else {
CU_ASSERT(g_req_records.count == 2);
CU_ASSERT(g_req_records.offset_blocks[0] == 1);
CU_ASSERT(g_req_records.num_blocks[0] == params->base_bdev_blockcnt - 1);
CU_ASSERT(g_req_records.io_type[0] == io_type);
CU_ASSERT(g_req_records.offset_blocks[1] == 0);
CU_ASSERT(g_req_records.num_blocks[1] == 2);
CU_ASSERT(g_req_records.io_type[1] == io_type);
}
bdev_io_cleanup(bdev_io);
free(ch);
free(raid_ch->base_channel);
free(raid_ch);
delete_concat(raid_bdev);
}
static void
test_concat_null_payload(void)
{
struct concat_params *params;
enum CONCAT_IO_TYPE io_type_list[] = {CONCAT_FLUSH, CONCAT_UNMAP};
enum CONCAT_IO_TYPE io_type;
int i;
CONCAT_PARAMS_FOR_EACH(params) {
for (i = 0; i < 2; i ++) {
io_type = io_type_list[i];
submit_and_verify_null_payload(io_type, params);
}
}
}
int
main(int argc, char **argv)
{
CU_pSuite suite = NULL;
unsigned int num_failures;
CU_set_error_action(CUEA_ABORT);
CU_initialize_registry();
suite = CU_add_suite("concat", test_setup, test_cleanup);
CU_ADD_TEST(suite, test_concat_start);
CU_ADD_TEST(suite, test_concat_rw);
CU_ADD_TEST(suite, test_concat_null_payload);
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();
num_failures = CU_get_number_of_failures();
CU_cleanup_registry();
return num_failures;
}

View File

@ -16,6 +16,7 @@ function unittest_bdev() {
$valgrind $testdir/lib/bdev/bdev.c/bdev_ut $valgrind $testdir/lib/bdev/bdev.c/bdev_ut
$valgrind $testdir/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut $valgrind $testdir/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut
$valgrind $testdir/lib/bdev/raid/bdev_raid.c/bdev_raid_ut $valgrind $testdir/lib/bdev/raid/bdev_raid.c/bdev_raid_ut
$valgrind $testdir/lib/bdev/raid/concat.c/concat_ut
$valgrind $testdir/lib/bdev/bdev_zone.c/bdev_zone_ut $valgrind $testdir/lib/bdev/bdev_zone.c/bdev_zone_ut
$valgrind $testdir/lib/bdev/gpt/gpt.c/gpt_ut $valgrind $testdir/lib/bdev/gpt/gpt.c/gpt_ut
$valgrind $testdir/lib/bdev/part.c/part_ut $valgrind $testdir/lib/bdev/part.c/part_ut