From 41586b0f1dd6bb1227f32ad654eee4108d9960b1 Mon Sep 17 00:00:00 2001 From: Kunal Sablok Date: Tue, 8 May 2018 07:30:29 -0400 Subject: [PATCH] bdev: add raid bdev module Raid module: ============ - SPDK raid bdev module is a new bdev module which is responsible for striping various NVMe devices and expose the raid bdev to bdev layer which would enhance the performance and capacity. - It can support theoretically 256 base devices (currently it is being tested max upto 8 base devices) - Multiple strip sizes like 32KB, 64KB, 128KB, 256KB, 512KB etc is supported. Most of the current testing is focused on 64KB strip size. - New RPC commands like "create raid bdev", "destroy raid bdev" and "get raid bdevs" are introduced to configure raid bdev dynamically in a running SPDK system. - Currently raid bdev configuration parameters are persisted in the current SPDK configuration file for across reboot support. DDF will be introduced later. High level testing done: ======================= - Raid bdev is created with 8 base NVMe devices via configuration file and is exposed to initiator via existing methods. Initiator is able to see a single NVMe namespace with capacity equal to sum of the minimum capacities of 8 devices. Initiator was able to run raw read/write workload, file system workload etc (tested with XFS file system workload). - Multiple raid bdevs are also created and exposed to initiator and tested with file system and other workloads for read/write IO. - LVS / LVOL are created over raid bdev and exposed to initiator. Testing was done for raw read/write workloads and XFS file system workloads. - RPC testing is done where on the running SPDK system raid bdevs are created out of NVMe base devices. These raid bdevs (and LVOLs over raid bdevs) are then exposed to initiator and IO workload was tested for raw read/write and XFS file system workload. - RPC testing is done for delete raid bdevs where all raid bdevs are deleted in running SPDK system. - RPC testing is done for get raid bdevs where existing list of raid bdev names is printed (it can be all raid bdevs or only online or only configuring or only offline). - RPC testing is done where raid bdevs and underlying NVMe devices relationship was returned in JSON RPC commands Change-Id: I10ae1266f8f2cca3c106e4df8c1c0993ddf435d8 Signed-off-by: Kunal Sablok Reviewed-on: https://review.gerrithub.io/410484 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris Chandler-Test-Pool: SPDK Automated Test System --- CHANGELOG.md | 7 + CONFIG | 3 + configure | 12 + lib/bdev/Makefile | 1 + lib/bdev/raid/Makefile | 41 + lib/bdev/raid/bdev_raid.c | 1321 ++++++++++ lib/bdev/raid/bdev_raid.h | 230 ++ lib/bdev/raid/bdev_raid_rpc.c | 632 +++++ mk/spdk.modules.mk | 4 + scripts/rpc.py | 39 + scripts/rpc/bdev.py | 43 + test/unit/lib/bdev/Makefile | 2 +- test/unit/lib/bdev/bdev_raid.c/.gitignore | 1 + test/unit/lib/bdev/bdev_raid.c/Makefile | 56 + test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c | 2123 +++++++++++++++++ test/unit/unittest.sh | 1 + 16 files changed, 4515 insertions(+), 1 deletion(-) create mode 100644 lib/bdev/raid/Makefile create mode 100644 lib/bdev/raid/bdev_raid.c create mode 100644 lib/bdev/raid/bdev_raid.h create mode 100644 lib/bdev/raid/bdev_raid_rpc.c create mode 100644 test/unit/lib/bdev/bdev_raid.c/.gitignore create mode 100644 test/unit/lib/bdev/bdev_raid.c/Makefile create mode 100644 test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c diff --git a/CHANGELOG.md b/CHANGELOG.md index 2346913f1..433257d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## v18.07: (Upcoming Release) +### RAID module +A new bdev module called "raid" has been added as experimental module which +aggregates underlying nvme bdevs and expose a single raid bdev to upper bdev +layers. Over this LVS/LVOL can be created as per use-cases and they can be +exposed to NVMe-oF subsystems. Please note that vhost will not work with RAID +module as RAID module does not support multipe IOV Vectors yet. + ### Log The debug log component flag has been renamed from `-t` to `-L` to prevent confusion diff --git a/CONFIG b/CONFIG index d503629ae..7b70731f6 100644 --- a/CONFIG +++ b/CONFIG @@ -98,3 +98,6 @@ CONFIG_VPP?=n # Requires libiscsi development libraries. CONFIG_ISCSI_INITIATOR?=n + +#Build with raid +CONFIG_RAID?=n diff --git a/configure b/configure index e1b1eb0bd..6c797d611 100755 --- a/configure +++ b/configure @@ -49,6 +49,8 @@ function usage() echo " No path required." echo " iscsi-initiator [disabled]" echo " No path required." + echo " raid [disabled]" + echo " No path required." echo " vtune Required to profile I/O under Intel VTune Amplifier XE." echo " example: /opt/intel/vtune_amplifier_xe_version" echo "" @@ -136,6 +138,13 @@ for i in "$@"; do --without-rbd) CONFIG_RBD=n ;; + --with-raid) + CONFIG_RAID=y + echo "Warning: vhost will not work with RAID module as multiple IOV support is not there" + ;; + --without-raid) + CONFIG_RAID=n + ;; --with-rdma) CONFIG_RDMA=y ;; @@ -327,6 +336,9 @@ fi if [ -n "$CONFIG_RBD" ]; then echo "CONFIG_RBD?=$CONFIG_RBD" >> CONFIG.local fi +if [ -n "$CONFIG_RAID" ]; then + echo "CONFIG_RAID?=$CONFIG_RAID" >> CONFIG.local +fi if [ -n "$CONFIG_VTUNE" ]; then echo "CONFIG_VTUNE?=$CONFIG_VTUNE" >> CONFIG.local fi diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile index e5291212d..d29f9a56f 100644 --- a/lib/bdev/Makefile +++ b/lib/bdev/Makefile @@ -52,5 +52,6 @@ DIRS-$(CONFIG_PMDK) += pmem endif DIRS-$(CONFIG_RBD) += rbd +DIRS-$(CONFIG_RAID) += raid include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/raid/Makefile b/lib/bdev/raid/Makefile new file mode 100644 index 000000000..8332399df --- /dev/null +++ b/lib/bdev/raid/Makefile @@ -0,0 +1,41 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ +C_SRCS = bdev_raid.c bdev_raid_rpc.c +LIBNAME = vbdev_raid + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/raid/bdev_raid.c b/lib/bdev/raid/bdev_raid.c new file mode 100644 index 000000000..aeb762af9 --- /dev/null +++ b/lib/bdev/raid/bdev_raid.c @@ -0,0 +1,1321 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_raid.h" +#include "spdk/env.h" +#include "spdk/io_channel.h" +#include "spdk/conf.h" +#include "spdk_internal/log.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/json.h" +#include "spdk/string.h" + +/* raid bdev config as read from config file */ +struct raid_config g_spdk_raid_config; + +/* + * List of raid bdev in configured list, these raid bdevs are registered with + * bdev layer + */ +struct spdk_raid_configured_tailq g_spdk_raid_bdev_configured_list; + +/* List of raid bdev in configuring list */ +struct spdk_raid_configuring_tailq g_spdk_raid_bdev_configuring_list; + +/* List of all raid bdevs */ +struct spdk_raid_all_tailq g_spdk_raid_bdev_list; + +/* List of all raid bdevs that are offline */ +struct spdk_raid_offline_tailq g_spdk_raid_bdev_offline_list; + +/* Function declarations */ +static void raid_bdev_examine(struct spdk_bdev *bdev); +static int raid_bdev_init(void); +static void raid_bdev_waitq_io_process(void *ctx); + + +/* + * brief: + * raid_bdev_create_cb function is a cb function for raid bdev which creates the + * hierarchy from raid bdev to base bdev io channels. It will be called per core + * params: + * io_device - pointer to raid bdev io device represented by raid_bdev + * ctx_buf - pointer to context buffer for raid bdev io channel + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_create_cb(void *io_device, void *ctx_buf) +{ + struct raid_bdev *raid_bdev = io_device; + struct raid_bdev_io_channel *ch = ctx_buf; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", ch); + + assert(raid_bdev != NULL); + assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE); + + /* + * Store raid_bdev_ctxt in each channel which is used to get the read only + * raid bdev specific information during io split logic like base bdev + * descriptors, strip size etc + */ + ch->raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + + ch->base_bdevs_io_channel = calloc(ch->raid_bdev_ctxt->raid_bdev.num_base_bdevs, + sizeof(struct spdk_io_channel *)); + if (!ch->base_bdevs_io_channel) { + SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); + return -1; + } + for (uint32_t iter = 0; iter < ch->raid_bdev_ctxt->raid_bdev.num_base_bdevs; iter++) { + /* + * Get the spdk_io_channel for all the base bdevs. This is used during + * split logic to send the respective child bdev ios to respective base + * bdev io channel. + */ + ch->base_bdevs_io_channel[iter] = spdk_bdev_get_io_channel( + raid_bdev->base_bdev_info[iter].base_bdev_desc); + if (!ch->base_bdevs_io_channel[iter]) { + for (uint32_t iter1 = 0; iter1 < iter ; iter1++) { + spdk_put_io_channel(ch->base_bdevs_io_channel[iter1]); + } + free(ch->base_bdevs_io_channel); + SPDK_ERRLOG("Unable to create io channel for base bdev\n"); + return -1; + } + } + + return 0; +} + +/* + * brief: + * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the + * hierarchy from raid bdev to base bdev io channels. It will be called per core + * params: + * io_device - pointer to raid bdev io device represented by raid_bdev + * ctx_buf - pointer to context buffer for raid bdev io channel + * returns: + * none + */ +static void +raid_bdev_destroy_cb(void *io_device, void *ctx_buf) +{ + struct raid_bdev_io_channel *ch = ctx_buf; + struct raid_bdev *raid_bdev = io_device; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n"); + + assert(raid_bdev != NULL); + assert(ch != NULL); + assert(ch->base_bdevs_io_channel); + for (uint32_t iter = 0; iter < raid_bdev->num_base_bdevs; iter++) { + /* Free base bdev channels */ + assert(ch->base_bdevs_io_channel[iter] != NULL); + spdk_put_io_channel(ch->base_bdevs_io_channel[iter]); + ch->base_bdevs_io_channel[iter] = NULL; + } + ch->raid_bdev_ctxt = NULL; + free(ch->base_bdevs_io_channel); + ch->base_bdevs_io_channel = NULL; +} + +/* + * brief: + * raid_bdev_cleanup is used to cleanup and free raid_bdev related data + * structures. + * params: + * raid_bdev_ctxt - pointer to raid_bdev_ctxt + * returns: + * none + */ +static void +raid_bdev_cleanup(struct raid_bdev_ctxt *raid_bdev_ctxt) +{ + struct raid_bdev *raid_bdev = &raid_bdev_ctxt->raid_bdev; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, raid_bdev_config %p\n", + raid_bdev_ctxt, + raid_bdev_ctxt->bdev.name, raid_bdev->state, raid_bdev->raid_bdev_config); + if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + } else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) { + TAILQ_REMOVE(&g_spdk_raid_bdev_offline_list, raid_bdev, link_specific_list); + } else { + assert(0); + } + TAILQ_REMOVE(&g_spdk_raid_bdev_list, raid_bdev, link_global_list); + assert(raid_bdev_ctxt->bdev.name); + free(raid_bdev_ctxt->bdev.name); + raid_bdev_ctxt->bdev.name = NULL; + assert(raid_bdev->base_bdev_info); + free(raid_bdev->base_bdev_info); + raid_bdev->base_bdev_info = NULL; + if (raid_bdev->raid_bdev_config) { + raid_bdev->raid_bdev_config->raid_bdev_ctxt = NULL; + } + free(raid_bdev_ctxt); +} + +/* + * brief: + * raid_bdev_destruct is the destruct function table pointer for raid bdev + * params: + * ctxt - pointer to raid_bdev_ctxt + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_destruct(void *ctxt) +{ + struct raid_bdev_ctxt *raid_bdev_ctxt = ctxt; + struct raid_bdev *raid_bdev = &raid_bdev_ctxt->raid_bdev; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n"); + + raid_bdev->destruct_called = true; + for (uint16_t iter = 0; iter < raid_bdev->num_base_bdevs; iter++) { + /* + * Close all base bdev descriptors for which call has come from below + * layers + */ + if ((raid_bdev->base_bdev_info[iter].base_bdev_remove_scheduled == true) && + (raid_bdev->base_bdev_info[iter].base_bdev != NULL)) { + spdk_bdev_module_release_bdev(raid_bdev->base_bdev_info[iter].base_bdev); + spdk_bdev_close(raid_bdev->base_bdev_info[iter].base_bdev_desc); + raid_bdev->base_bdev_info[iter].base_bdev_desc = NULL; + raid_bdev->base_bdev_info[iter].base_bdev = NULL; + assert(raid_bdev->num_base_bdevs_discovered); + raid_bdev->num_base_bdevs_discovered--; + } + } + + if (raid_bdev->num_base_bdevs_discovered == 0) { + /* Free raid_bdev when there no base bdevs left */ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n"); + raid_bdev_cleanup(raid_bdev_ctxt); + } + + return 0; +} + +/* + * brief: + * raid_bdev_io_completion function is called by lower layers to notify raid + * module that particular bdev_io is completed. + * params: + * bdev_io - pointer to bdev io submitted to lower layers, like child io + * success - bdev_io status + * cb_arg - function callback context, like parent io pointer + * returns: + * none + */ +static void +raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *parent_io = cb_arg; + struct raid_bdev_io *raid_bdev_io = (struct raid_bdev_io *)parent_io->driver_ctx; + + assert(raid_bdev_io->splits_comp_outstanding); + raid_bdev_io->splits_comp_outstanding--; + if (raid_bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { + /* + * Store failure status if any of the child bdev io. If any of the child + * fails, overall parent bdev_io is considered failed but parent bdev io + * status is only communicated to above layers on all child completions + */ + raid_bdev_io->status = success; + } + /* Free child bdev io */ + spdk_bdev_free_io(bdev_io); + + if (!raid_bdev_io->splits_pending && !raid_bdev_io->splits_comp_outstanding) { + /* + * If all childs are submitted and all childs are completed, process + * parent bdev io completion and complete the parent bdev io with + * appropriate status. If any of the child bdev io is failed, parent + * bdev io is considered failed. + */ + if (raid_bdev_io->status) { + spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } else { + spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); + } + } +} + +/* + * brief: + * raid_bdev_send_passthru function sends the bdev_io to the underlying + * base device by-passing the splitting logic. This is used for optimization + * when the total number of base devices in a raid bdev is only 1. + * params: + * ch - pointer to io channel for this io + * bdev_io - pointer to bdev_io + * returns: + * 0 - success + * non-zero - error + */ +static int +raid_bdev_send_passthru(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct raid_bdev_io_channel *raid_bdev_io_channel; + struct raid_bdev_io *raid_bdev_io; + struct raid_bdev *raid_bdev; + int ret; + + raid_bdev_io_channel = spdk_io_channel_get_ctx(ch); + raid_bdev = &raid_bdev_io_channel->raid_bdev_ctxt->raid_bdev; + raid_bdev_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + raid_bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; + + if (raid_bdev->base_bdev_info[0].base_bdev_desc == NULL) { + SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", 0); + assert(0); + } + raid_bdev_io->splits_pending = 0; + raid_bdev_io->splits_comp_outstanding = 1; + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + ret = spdk_bdev_read_blocks(raid_bdev->base_bdev_info[0].base_bdev_desc, + raid_bdev_io_channel->base_bdevs_io_channel[0], + bdev_io->u.bdev.iovs->iov_base, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, raid_bdev_io_completion, + bdev_io); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + ret = spdk_bdev_write_blocks(raid_bdev->base_bdev_info[0].base_bdev_desc, + raid_bdev_io_channel->base_bdevs_io_channel[0], + bdev_io->u.bdev.iovs->iov_base, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, raid_bdev_io_completion, + bdev_io); + } else { + ret = -EINVAL; + } + if (ret != 0) { + /* + * If failed to submit child io to bdev layer then queue the parent + * bdev io with current active split information in the wait queue + * for that core. This will get resume from this point only. Assume + * if 4 splits are required and 2 childs are submitted, then parent + * io is queued to io waitq of this core and it will get resumed and + * try to submit the remaining 3 and 4 childs + */ + raid_bdev_io->splits_pending = 1; + raid_bdev_io->splits_comp_outstanding = 0; + raid_bdev_io->ch = ch; + return ret; + } + + return 0; +} + +/* + * brief: + * raid_bdev_submit_children function is used to split the parent io and submit + * the childs to bdev layer. bdev layer redirects the childs to appropriate base + * bdev nvme module + * params: + * ch - pointer to spdk_io_channel for the raid bdev + * bdev_io - parent bdev io + * start_strip - start strip number of this io + * end_strip - end strip number of this io + * cur_strip - current strip number of this io to start processing + * buf - pointer to buffer for this io + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_submit_children(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + uint64_t start_strip, uint64_t end_strip, uint64_t cur_strip, uint8_t *buf) +{ + struct raid_bdev_io_channel *raid_bdev_io_channel = spdk_io_channel_get_ctx(ch); + struct raid_bdev_io *raid_bdev_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + struct raid_bdev *raid_bdev = &raid_bdev_io_channel->raid_bdev_ctxt->raid_bdev; + uint64_t pd_strip; + uint32_t offset_in_strip; + uint64_t pd_lba; + uint64_t pd_blocks; + uint32_t pd_idx; + int ret; + + for (uint64_t strip = cur_strip; strip <= end_strip; strip++) { + /* + * For each strip of parent bdev io, process for each strip and submit + * child io to bdev layer. Calculate base bdev level start lba, length + * and buffer for this child io + */ + pd_strip = strip / raid_bdev->num_base_bdevs; + pd_idx = strip % raid_bdev->num_base_bdevs; + if (strip == start_strip) { + offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); + pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; + if (strip == end_strip) { + pd_blocks = bdev_io->u.bdev.num_blocks; + } else { + pd_blocks = raid_bdev->strip_size - offset_in_strip; + } + } else if (strip == end_strip) { + pd_lba = pd_strip << raid_bdev->strip_size_shift; + pd_blocks = ((bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) & + (raid_bdev->strip_size - 1)) + 1; + } else { + pd_lba = pd_strip << raid_bdev->strip_size_shift; + pd_blocks = raid_bdev->strip_size; + } + raid_bdev_io->splits_comp_outstanding++; + assert(raid_bdev_io->splits_pending); + raid_bdev_io->splits_pending--; + if (raid_bdev->base_bdev_info[pd_idx].base_bdev_desc == NULL) { + SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); + assert(0); + } + + /* + * Submit child io to bdev layer with using base bdev descriptors, base + * bdev lba, base bdev child io length in blocks, buffer, completion + * function and function callback context + */ + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + ret = spdk_bdev_read_blocks(raid_bdev->base_bdev_info[pd_idx].base_bdev_desc, + raid_bdev_io_channel->base_bdevs_io_channel[pd_idx], + buf, pd_lba, pd_blocks, raid_bdev_io_completion, + bdev_io); + + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + ret = spdk_bdev_write_blocks(raid_bdev->base_bdev_info[pd_idx].base_bdev_desc, + raid_bdev_io_channel->base_bdevs_io_channel[pd_idx], + buf, pd_lba, pd_blocks, raid_bdev_io_completion, + bdev_io); + } else { + SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); + assert(0); + } + if (ret != 0) { + /* + * If failed to submit child io to bdev layer then queue the parent + * bdev io with current active split information in the wait queue + * for that core. This will get resume from this point only. Assume + * if 4 splits are required and 2 childs are submitted, then parent + * io is queued to io waitq of this core and it will get resumed and + * try to submit the remaining 3 and 4 childs + */ + raid_bdev_io->buf = buf; + raid_bdev_io->ch = ch; + raid_bdev_io->splits_comp_outstanding--; + raid_bdev_io->splits_pending++; + return ret; + } + buf += (pd_blocks << raid_bdev->blocklen_shift); + } + + return 0; +} + +/* + * brief: + * get_curr_base_bdev_index function calculates the base bdev index + * which should be processed next based on splits_pending parameter + * params: + * raid_bdev - pointer to pooled bdev + * raid_bdev_io - pointer to parent io context + * returns: + * base bdev index + */ +static uint8_t +get_curr_base_bdev_index(struct raid_bdev *raid_bdev, struct raid_bdev_io *raid_bdev_io) +{ + struct spdk_bdev_io *bdev_io; + uint64_t start_strip; + uint64_t end_strip; + uint64_t cur_strip; + + bdev_io = SPDK_CONTAINEROF(raid_bdev_io, struct spdk_bdev_io, driver_ctx); + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + cur_strip = start_strip + ((end_strip - start_strip + 1) - raid_bdev_io->splits_pending); + + return (cur_strip % raid_bdev->num_base_bdevs); +} + +/* + * brief: + * raid_bdev_io_terminate function terminates the execution of the IO. If + * any outstanding children are there it waits for completion, otherwise it + * immediately completes the IO with failure. + * params: + * bdev_io - pointer to parent io + * raid_bdev_io - pointer to parent io context + * returns: + * none + */ +static void +raid_bdev_io_terminate(struct spdk_bdev_io *bdev_io, struct raid_bdev_io *raid_bdev_io) +{ + if (raid_bdev_io->splits_comp_outstanding == 0) { + /* If no children is outstanding, immediately fail the parent IO */ + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } else { + /* If any children is outstanding, + * wait for them to complete but don't send further Ios */ + raid_bdev_io->splits_pending = 0; + raid_bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; + } +} + +/* + * brief: + * raid_bdev_io_submit_fail_process function processes the IO which failed to submit. + * It will try to queue the IOs after storing the context to bdev wait queue logic. + * params: + * bdev_io - pointer to bdev_io + * raid_bdev_io - pointer to raid bdev io + * ret - return code + * returns: + * none + */ +static void +raid_bdev_io_submit_fail_process(struct raid_bdev *raid_bdev, struct spdk_bdev_io *bdev_io, + struct raid_bdev_io *raid_bdev_io, int ret) +{ + struct raid_bdev_io_channel *raid_bdev_io_channel; + uint8_t pd_idx; + + if (ret != -ENOMEM) { + raid_bdev_io_terminate(bdev_io, raid_bdev_io); + } else { + /* Queue the IO to bdev layer wait queue */ + pd_idx = get_curr_base_bdev_index(raid_bdev, raid_bdev_io); + raid_bdev_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].base_bdev; + raid_bdev_io->waitq_entry.cb_fn = raid_bdev_waitq_io_process; + raid_bdev_io->waitq_entry.cb_arg = raid_bdev_io; + raid_bdev_io_channel = spdk_io_channel_get_ctx(raid_bdev_io->ch); + if (spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].base_bdev, + raid_bdev_io_channel->base_bdevs_io_channel[pd_idx], + &raid_bdev_io->waitq_entry) != 0) { + SPDK_ERRLOG("bdev io waitq error, it should not happen\n"); + assert(0); + raid_bdev_io_terminate(bdev_io, raid_bdev_io); + } + } +} + +/* + * brief: + * raid_bdev_waitq_io_process function is the callback function + * registerd by raid bdev module to bdev when bdev_io was unavailable. + * params: + * ctx - pointer to raid_bdev_io + * returns: + * none + */ +static void +raid_bdev_waitq_io_process(void *ctx) +{ + struct raid_bdev_io *raid_bdev_io = ctx; + struct spdk_bdev_io *bdev_io; + struct raid_bdev_io_channel *raid_bdev_io_channel; + struct raid_bdev *raid_bdev; + int ret; + uint64_t start_strip; + uint64_t end_strip; + uint64_t cur_strip; + + bdev_io = SPDK_CONTAINEROF(raid_bdev_io, struct spdk_bdev_io, driver_ctx); + /* + * Try to submit childs of parent bdev io. If failed due to resource + * crunch then break the loop and don't try to process other queued IOs. + */ + raid_bdev_io_channel = spdk_io_channel_get_ctx(raid_bdev_io->ch); + raid_bdev = &raid_bdev_io_channel->raid_bdev_ctxt->raid_bdev; + if (raid_bdev->num_base_bdevs > 1) { + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + cur_strip = start_strip + ((end_strip - start_strip + 1) - raid_bdev_io->splits_pending); + ret = raid_bdev_submit_children(raid_bdev_io->ch, bdev_io, start_strip, end_strip, cur_strip, + raid_bdev_io->buf); + } else { + ret = raid_bdev_send_passthru(raid_bdev_io->ch, bdev_io); + } + if (ret != 0) { + raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_bdev_io, ret); + } +} + +/* + * brief: + * raid_bdev_submit_request function is the submit_request function pointer of + * raid bdev function table. This is used to submit the io on raid_bdev to below + * layers. If iowaitq is not empty, it will queue the parent bdev_io to the end + * of the queue. + * params: + * ch - pointer to raid bdev io channel + * bdev_io - pointer to parent bdev_io on raid bdev device + * returns: + * none + */ +static void +raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct raid_bdev_io_channel *raid_bdev_io_channel; + struct raid_bdev_io *raid_bdev_io; + struct raid_bdev *raid_bdev; + uint64_t start_strip = 0; + uint64_t end_strip = 0; + int ret; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + if (bdev_io->u.bdev.iovcnt != 1) { + SPDK_ERRLOG("iov vector count is not 1\n"); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + break; + } + /* + * IO parameters used during io split and io completion + */ + raid_bdev_io_channel = spdk_io_channel_get_ctx(ch); + raid_bdev = &raid_bdev_io_channel->raid_bdev_ctxt->raid_bdev; + raid_bdev_io = (struct raid_bdev_io *)bdev_io->driver_ctx; + if (raid_bdev->num_base_bdevs > 1) { + start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; + end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + raid_bdev->strip_size_shift; + /* + * IO parameters used during io split and io completion + */ + raid_bdev_io->splits_pending = (end_strip - start_strip + 1); + raid_bdev_io->splits_comp_outstanding = 0; + raid_bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; + ret = raid_bdev_submit_children(ch, bdev_io, start_strip, end_strip, start_strip, + bdev_io->u.bdev.iovs->iov_base); + } else { + ret = raid_bdev_send_passthru(ch, bdev_io); + } + if (ret != 0) { + raid_bdev_io_submit_fail_process(raid_bdev, bdev_io, raid_bdev_io, ret); + } + break; + + case SPDK_BDEV_IO_TYPE_FLUSH: + // TODO: support flush if requirement comes + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); + break; + + default: + SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + break; + } + +} + +/* + * brief: + * raid_bdev_io_type_supported is the io_supported function for bdev function + * table which returns whether the particular io type is supported or not by + * raid bdev module + * params: + * ctx - pointer to raid bdev context + * type - io type + * returns: + * true - io_type is supported + * false - io_type is not supported + */ +static bool +raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + switch (io_type) { + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + case SPDK_BDEV_IO_TYPE_FLUSH: + return true; + default: + return false; + } + + return false; +} + +/* + * brief: + * raid_bdev_get_io_channel is the get_io_channel function table pointer for + * raid bdev. This is used to return the io channel for this raid bdev + * params: + * ctxt - pointer to raid_bdev_ctxt + * returns: + * pointer to io channel for raid bdev + */ +static struct spdk_io_channel * +raid_bdev_get_io_channel(void *ctxt) +{ + struct raid_bdev_ctxt *raid_bdev_ctxt = ctxt; + + return spdk_get_io_channel(&raid_bdev_ctxt->raid_bdev); +} + +/* + * brief: + * raid_bdev_dump_info_json is the function table pointer for raid bdev + * params: + * ctx - pointer to raid_bdev_ctxt + * w - pointer to json context + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct raid_bdev_ctxt *raid_bdev_ctxt = ctx; + struct raid_bdev *raid_bdev; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n"); + assert(raid_bdev_ctxt != NULL); + raid_bdev = &raid_bdev_ctxt->raid_bdev; + + /* Dump the raid bdev configuration related information */ + spdk_json_write_name(w, "raid"); + spdk_json_write_object_begin(w); + spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size); + spdk_json_write_named_uint32(w, "state", raid_bdev->state); + spdk_json_write_named_uint32(w, "raid_level", raid_bdev->raid_level); + spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called); + spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs); + spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered); + spdk_json_write_name(w, "base_bdevs_list"); + spdk_json_write_array_begin(w); + for (uint16_t iter = 0; iter < raid_bdev->num_base_bdevs; iter++) { + if (raid_bdev->base_bdev_info[iter].base_bdev) { + spdk_json_write_string(w, raid_bdev->base_bdev_info[iter].base_bdev->name); + } else { + spdk_json_write_null(w); + } + } + spdk_json_write_array_end(w); + spdk_json_write_object_end(w); + + return 0; +} + +/* g_raid_bdev_fn_table is the function table for raid bdev */ +static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = { + .destruct = raid_bdev_destruct, + .submit_request = raid_bdev_submit_request, + .io_type_supported = raid_bdev_io_type_supported, + .get_io_channel = raid_bdev_get_io_channel, + .dump_info_json = raid_bdev_dump_info_json, +}; + +/* + * brief: + * raid_bdev_free is the raid bdev function table function pointer. This is + * called on bdev free path + * params: + * none + * returns: + * none + */ +static void +raid_bdev_free(void) +{ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n"); + for (uint32_t raid_bdev = 0; raid_bdev < g_spdk_raid_config.total_raid_bdev; raid_bdev++) { + if (g_spdk_raid_config.raid_bdev_config[raid_bdev].base_bdev) { + for (uint32_t iter = 0; iter < g_spdk_raid_config.raid_bdev_config[raid_bdev].num_base_bdevs; + iter++) { + free(g_spdk_raid_config.raid_bdev_config[raid_bdev].base_bdev[iter].bdev_name); + } + free(g_spdk_raid_config.raid_bdev_config[raid_bdev].base_bdev); + g_spdk_raid_config.raid_bdev_config[raid_bdev].base_bdev = NULL; + } + free(g_spdk_raid_config.raid_bdev_config[raid_bdev].name); + } + if (g_spdk_raid_config.raid_bdev_config) { + if (g_spdk_raid_config.raid_bdev_config->raid_bdev_ctxt) { + g_spdk_raid_config.raid_bdev_config->raid_bdev_ctxt->raid_bdev.raid_bdev_config = NULL; + } + free(g_spdk_raid_config.raid_bdev_config); + g_spdk_raid_config.raid_bdev_config = NULL; + g_spdk_raid_config.total_raid_bdev = 0; + } +} + +/* + * brief: + * raid_bdev_parse_raid is used to parse the raid bdev from config file based on + * pre-defined raid bdev format in config file. + * Format of config file: + * [RAID1] + * Name raid1 + * StripSize 64 + * NumDevices 2 + * RaidLevel 0 + * Devices Nvme0n1 Nvme1n1 + * + * [RAID2] + * Name raid2 + * StripSize 64 + * NumDevices 3 + * RaidLevel 0 + * Devices Nvme2n1 Nvme3n1 Nvme4n1 + * + * params: + * conf_section - pointer to config section + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_parse_raid(struct spdk_conf_section *conf_section) +{ + const char *raid_name; + int strip_size; + int num_base_bdevs; + int raid_level; + const char *base_bdev_name; + uint32_t iter; + void *temp_ptr; + struct raid_bdev_config *raid_bdev_config; + + raid_name = spdk_conf_section_get_val(conf_section, "Name"); + if (raid_name == NULL) { + SPDK_ERRLOG("raid_name %s is null\n", raid_name); + return -1; + } + strip_size = spdk_conf_section_get_intval(conf_section, "StripSize"); + if (spdk_u32_is_pow2(strip_size) == false) { + SPDK_ERRLOG("Invalid strip size %d\n", strip_size); + return -1; + } + num_base_bdevs = spdk_conf_section_get_intval(conf_section, "NumDevices"); + if (num_base_bdevs <= 0) { + SPDK_ERRLOG("Invalid base device count %d\n", num_base_bdevs); + return -1; + } + raid_level = spdk_conf_section_get_intval(conf_section, "RaidLevel"); + if (raid_level != 0) { + SPDK_ERRLOG("invalid raid level %d, only raid level 0 is supported\n", raid_level); + return -1; + } + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %d %d %d\n", raid_name, strip_size, num_base_bdevs, + raid_level); + + for (iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + if (!strcmp(g_spdk_raid_config.raid_bdev_config[iter].name, raid_name)) { + SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n", raid_name); + return -1; + } + } + temp_ptr = realloc(g_spdk_raid_config.raid_bdev_config, + sizeof(struct raid_bdev_config) * (g_spdk_raid_config.total_raid_bdev + 1)); + if (temp_ptr == NULL) { + SPDK_ERRLOG("unable to allocate memory\n"); + return -1; + } + + g_spdk_raid_config.raid_bdev_config = temp_ptr; + raid_bdev_config = &g_spdk_raid_config.raid_bdev_config[g_spdk_raid_config.total_raid_bdev]; + memset(raid_bdev_config, 0, sizeof(*raid_bdev_config)); + raid_bdev_config->name = strdup(raid_name); + if (!raid_bdev_config->name) { + SPDK_ERRLOG("unable to allocate memory\n"); + return -1; + } + raid_bdev_config->strip_size = strip_size; + raid_bdev_config->num_base_bdevs = num_base_bdevs; + raid_bdev_config->raid_level = raid_level; + g_spdk_raid_config.total_raid_bdev++; + raid_bdev_config->base_bdev = calloc(num_base_bdevs, sizeof(*raid_bdev_config->base_bdev)); + if (raid_bdev_config->base_bdev == NULL) { + SPDK_ERRLOG("unable to allocate memory\n"); + return -1; + } + + for (iter = 0; true; iter++) { + base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, iter); + if (base_bdev_name == NULL) { + break; + } + if (iter >= raid_bdev_config->num_base_bdevs) { + SPDK_ERRLOG("Number of devices mentioned is more than count\n"); + return -1; + } + for (uint32_t iter2 = 0; iter2 < g_spdk_raid_config.total_raid_bdev; iter2++) { + for (uint32_t iter3 = 0; iter3 < g_spdk_raid_config.raid_bdev_config[iter2].num_base_bdevs; + iter3++) { + if (g_spdk_raid_config.raid_bdev_config[iter2].base_bdev[iter3].bdev_name != NULL) { + if (!strcmp(g_spdk_raid_config.raid_bdev_config[iter2].base_bdev[iter3].bdev_name, + base_bdev_name)) { + SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", base_bdev_name); + return -1; + } + } + } + } + raid_bdev_config->base_bdev[iter].bdev_name = strdup(base_bdev_name); + } + + if (iter != raid_bdev_config->num_base_bdevs) { + SPDK_ERRLOG("Number of devices mentioned is less than count\n"); + return -1; + } + return 0; +} + +/* + * brief: + * raid_bdev_parse_config is used to find the raid bdev config section and parse it + * Format of config file: + * params: + * none + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_parse_config(void) +{ + int ret; + struct spdk_conf_section *conf_section; + + conf_section = spdk_conf_first_section(NULL); + while (conf_section != NULL) { + if (spdk_conf_section_match_prefix(conf_section, "RAID")) { + ret = raid_bdev_parse_raid(conf_section); + if (ret < 0) { + SPDK_ERRLOG("Unable to parse raid bdev section\n"); + return ret; + } + } + conf_section = spdk_conf_next_section(conf_section); + } + + return 0; +} + +/* + * brief: + * raid_bdev_exit is called on raid bdev module exit time by bdev layer + * params: + * none + * returns: + * none + */ +static void +raid_bdev_exit(void) +{ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n"); + raid_bdev_free(); +} + +/* + * brief: + * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid + * module + * params: + * none + * returns: + * size of spdk_bdev_io context for raid + */ +static int +raid_bdev_get_ctx_size(void) +{ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n"); + return sizeof(struct raid_bdev_io); +} + +/* + * brief: + * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be + * claimed by raid bdev or not. + * params: + * bdev_name - represents base bdev name + * raid_bdev_config - pointer to raid bdev config parsed from config file + * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct + * slot. This field is only valid if return value of this function is true + * returns: + * true - if bdev can be claimed + * false - if bdev can't be claimed + */ +static bool +raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **raid_bdev_config, + uint32_t *base_bdev_slot) +{ + bool rv = false; + + for (uint32_t iter1 = 0; iter1 < g_spdk_raid_config.total_raid_bdev && !rv; iter1++) { + for (uint32_t iter2 = 0; iter2 < g_spdk_raid_config.raid_bdev_config[iter1].num_base_bdevs; + iter2++) { + /* + * Check if the base bdev name is part of raid bdev configuration. + * If match is found then return true and the slot information where + * this base bdev should be inserted in raid bdev + */ + if (!strcmp(bdev_name, g_spdk_raid_config.raid_bdev_config[iter1].base_bdev[iter2].bdev_name)) { + *raid_bdev_config = &g_spdk_raid_config.raid_bdev_config[iter1]; + *base_bdev_slot = iter2; + rv = true; + break; + } + } + } + + return rv; +} + + +static struct spdk_bdev_module g_raid_if = { + .name = "raid", + .module_init = raid_bdev_init, + .module_fini = raid_bdev_exit, + .get_ctx_size = raid_bdev_get_ctx_size, + .examine_config = raid_bdev_examine, + .config_text = NULL, + .async_init = false, + .async_fini = false, +}; +SPDK_BDEV_MODULE_REGISTER(&g_raid_if) + +/* + * brief: + * raid_bdev_init is the initialization function for raid bdev module + * params: + * none + * returns: + * 0 - success + * non zero - failure + */ +static int +raid_bdev_init(void) +{ + int ret; + + memset(&g_spdk_raid_config, 0, sizeof(g_spdk_raid_config)); + TAILQ_INIT(&g_spdk_raid_bdev_configured_list); + TAILQ_INIT(&g_spdk_raid_bdev_configuring_list); + TAILQ_INIT(&g_spdk_raid_bdev_list); + TAILQ_INIT(&g_spdk_raid_bdev_offline_list); + + /* Parse config file for raids */ + ret = raid_bdev_parse_config(); + if (ret < 0) { + SPDK_ERRLOG("raid bdev init failed parsing\n"); + raid_bdev_free(); + return ret; + } + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n"); + + return 0; +} + +/* + * brief: + * raid_bdev_remove_base_bdev function is called by below layers when base_bdev + * is removed. This function checks if this base bdev is part of any raid bdev + * or not. If yes, it takes necessary action on that particular raid bdev. + * params: + * ctx - pointer to base bdev pointer which got removed + * returns: + * none + */ +void +raid_bdev_remove_base_bdev(void *ctx) +{ + struct spdk_bdev *base_bdev = ctx; + struct raid_bdev *raid_bdev; + struct raid_bdev *next_raid_bdev; + struct raid_bdev_ctxt *raid_bdev_ctxt; + uint16_t iter; + bool found = false; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n"); + + /* Find the raid_bdev which has claimed this base_bdev */ + TAILQ_FOREACH_SAFE(raid_bdev, &g_spdk_raid_bdev_list, link_global_list, next_raid_bdev) { + for (iter = 0; iter < raid_bdev->num_base_bdevs; iter++) { + if (raid_bdev->base_bdev_info[iter].base_bdev == base_bdev) { + found = true; + break; + } + } + if (found == true) { + break; + } + } + + if (found == false) { + SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); + return; + } + + assert(raid_bdev != NULL); + assert(raid_bdev->base_bdev_info[iter].base_bdev); + assert(raid_bdev->base_bdev_info[iter].base_bdev_desc); + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + raid_bdev->base_bdev_info[iter].base_bdev_remove_scheduled = true; + + if (raid_bdev->destruct_called == true && raid_bdev->base_bdev_info[iter].base_bdev != NULL) { + /* As raid bdev is already unregistered, so cleanup should be done here itself */ + spdk_bdev_module_release_bdev(raid_bdev->base_bdev_info[iter].base_bdev); + spdk_bdev_close(raid_bdev->base_bdev_info[iter].base_bdev_desc); + raid_bdev->base_bdev_info[iter].base_bdev_desc = NULL; + raid_bdev->base_bdev_info[iter].base_bdev = NULL; + assert(raid_bdev->num_base_bdevs_discovered); + raid_bdev->num_base_bdevs_discovered--; + if (raid_bdev->num_base_bdevs_discovered == 0) { + /* Since there is no base bdev for this raid, so free the raid device */ + raid_bdev_cleanup(raid_bdev_ctxt); + return; + } + } + + if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) { + /* + * If raid bdev is online and registered, change the bdev state to + * configuring and unregister this raid device. Queue this raid device + * in configuring list + */ + assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); + TAILQ_REMOVE(&g_spdk_raid_bdev_configured_list, raid_bdev, link_specific_list); + raid_bdev->state = RAID_BDEV_STATE_OFFLINE; + assert(raid_bdev->num_base_bdevs_discovered); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, link_specific_list); + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n"); + spdk_io_device_unregister(&raid_bdev_ctxt->raid_bdev, NULL); + spdk_bdev_unregister(&raid_bdev_ctxt->bdev, NULL, NULL); + } +} + +/* + * brief: + * raid_bdev_add_base_device function is the actual function which either adds + * the nvme base device to existing raid bdev or create a new raid bdev. It also claims + * the base device and keep the open descriptor. + * params: + * bdev - pointer to base bdev + * returns: + * 0 - success + * non zero - failure + */ +int +raid_bdev_add_base_device(struct spdk_bdev *bdev) +{ + struct raid_bdev_config *raid_bdev_config = NULL; + struct raid_bdev_ctxt *raid_bdev_ctxt; + struct raid_bdev *raid_bdev; + struct spdk_bdev_desc *desc; + struct spdk_bdev *raid_bdev_gen; + uint32_t blocklen; + uint64_t min_blockcnt; + uint32_t base_bdev_slot; + bool can_claim; + + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_examine %p\n", bdev); + + can_claim = raid_bdev_can_claim_bdev(bdev->name, &raid_bdev_config, &base_bdev_slot); + + if (!can_claim) { + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n", bdev->name); + return -1; + } + assert(raid_bdev_config); + + if (spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc)) { + SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name); + return -1; + } + + if (spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if)) { + SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); + spdk_bdev_close(desc); + return -1; + } + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name); + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_config->raid_bdev_ctxt %p\n", + raid_bdev_config->raid_bdev_ctxt); + + if (!raid_bdev_config->raid_bdev_ctxt) { + /* Allocate raid_bdev entity if it is not already allocated */ + raid_bdev_ctxt = calloc(1, sizeof(*raid_bdev_ctxt)); + if (!raid_bdev_ctxt) { + SPDK_ERRLOG("Unable to allocate memory for raid bdev for bdev '%s'\n", bdev->name); + spdk_bdev_module_release_bdev(bdev); + spdk_bdev_close(desc); + return -1; + } + raid_bdev = &raid_bdev_ctxt->raid_bdev; + raid_bdev->num_base_bdevs = raid_bdev_config->num_base_bdevs; + raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, sizeof(struct raid_base_bdev_info)); + if (!raid_bdev->base_bdev_info) { + SPDK_ERRLOG("Unable able to allocate base bdev info\n"); + free(raid_bdev_ctxt); + spdk_bdev_module_release_bdev(bdev); + spdk_bdev_close(desc); + return -1; + } + raid_bdev_config->raid_bdev_ctxt = raid_bdev_ctxt; + raid_bdev->strip_size = raid_bdev_config->strip_size; + raid_bdev->state = RAID_BDEV_STATE_CONFIGURING; + raid_bdev->raid_bdev_config = raid_bdev_config; + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_list, raid_bdev, link_global_list); + } else { + raid_bdev = &raid_bdev_config->raid_bdev_ctxt->raid_bdev; + } + + assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE); + assert(base_bdev_slot < raid_bdev->num_base_bdevs); + + raid_bdev->base_bdev_info[base_bdev_slot].base_bdev = bdev; + raid_bdev->base_bdev_info[base_bdev_slot].base_bdev_desc = desc; + raid_bdev->num_base_bdevs_discovered++; + + assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); + + if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { + /* If raid bdev config is complete, then only register the raid bdev to + * bdev layer and remove this raid bdev from configuring list and + * insert the raid bdev to configured list + */ + blocklen = raid_bdev->base_bdev_info[0].base_bdev->blocklen; + min_blockcnt = raid_bdev->base_bdev_info[0].base_bdev->blockcnt; + for (uint32_t iter = 1; iter < raid_bdev->num_base_bdevs; iter++) { + /* Calculate minimum block count from all base bdevs */ + if (raid_bdev->base_bdev_info[iter].base_bdev->blockcnt < min_blockcnt) { + min_blockcnt = raid_bdev->base_bdev_info[iter].base_bdev->blockcnt; + } + + /* Check blocklen for all base bdevs that it should be same */ + if (blocklen != raid_bdev->base_bdev_info[iter].base_bdev->blocklen) { + /* + * Assumption is that all the base bdevs for any raid bdev should + * have same blocklen + */ + SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); + raid_bdev->state = RAID_BDEV_STATE_OFFLINE; + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, link_specific_list); + return -1; + } + } + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + raid_bdev_gen = &raid_bdev_ctxt->bdev; + raid_bdev_gen->name = strdup(raid_bdev_config->name); + if (!raid_bdev_gen->name) { + SPDK_ERRLOG("Unable to allocate name for raid\n"); + raid_bdev->state = RAID_BDEV_STATE_OFFLINE; + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, link_specific_list); + return -1; + } + raid_bdev_gen->product_name = "Pooled Device"; + raid_bdev_gen->write_cache = 0; + raid_bdev_gen->blocklen = blocklen; + raid_bdev_gen->optimal_io_boundary = 0; + raid_bdev_gen->ctxt = raid_bdev_ctxt; + raid_bdev_gen->fn_table = &g_raid_bdev_fn_table; + raid_bdev_gen->module = &g_raid_if; + raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen; + raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); + raid_bdev->blocklen_shift = spdk_u32log2(blocklen); + + /* + * RAID bdev logic is for striping so take the minimum block count based + * approach where total block count of raid bdev is the number of base + * bdev times the minimum block count of any base bdev + */ + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu, numbasedev %u, strip size shift %u\n", + min_blockcnt, + raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); + raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) << + raid_bdev->strip_size_shift) * raid_bdev->num_base_bdevs; + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev); + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt, + raid_bdev_gen->blocklen); + if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) { + raid_bdev->state = RAID_BDEV_STATE_ONLINE; + spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb, + sizeof(struct raid_bdev_io_channel)); + if (spdk_bdev_register(raid_bdev_gen)) { + /* + * If failed to register raid bdev to bdev layer, make raid bdev offline + * and add to offline list + */ + SPDK_ERRLOG("Unable to register pooled bdev\n"); + spdk_io_device_unregister(raid_bdev, NULL); + raid_bdev->state = RAID_BDEV_STATE_OFFLINE; + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_offline_list, raid_bdev, link_specific_list); + return -1; + } + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen); + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_INSERT_TAIL(&g_spdk_raid_bdev_configured_list, raid_bdev, link_specific_list); + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n", + raid_bdev_gen->name, raid_bdev); + } + } + + return 0; +} + +/* + * brief: + * raid_bdev_examine function is the examine function call by the below layers + * like bdev_nvme layer. This function will check if this base bdev can be + * claimed by this raid bdev or not. + * params: + * bdev - pointer to base bdev + * returns: + * none + */ +static void +raid_bdev_examine(struct spdk_bdev *bdev) +{ + raid_bdev_add_base_device(bdev); + spdk_bdev_module_examine_done(&g_raid_if); +} + +/* Log component for bdev raid bdev module */ +SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID) diff --git a/lib/bdev/raid/bdev_raid.h b/lib/bdev/raid/bdev_raid.h new file mode 100644 index 000000000..260649dae --- /dev/null +++ b/lib/bdev/raid/bdev_raid.h @@ -0,0 +1,230 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BDEV_RAID_INTERNAL_H +#define SPDK_BDEV_RAID_INTERNAL_H + +#include "spdk/bdev_module.h" + +/* + * Raid state describes the state of the raid. This raid bdev can be either in + * configured list or configuring list + */ +enum raid_bdev_state { + /* raid bdev is ready and is seen by upper layers */ + RAID_BDEV_STATE_ONLINE, + + /* raid bdev is configuring, not all underlying bdevs are present */ + RAID_BDEV_STATE_CONFIGURING, + + /* + * In offline state, raid bdev layer will complete all incoming commands without + * submitting to underlying base nvme bdevs + */ + RAID_BDEV_STATE_OFFLINE, + + /* raid bdev max, new states should be added before this */ + RAID_BDEV_MAX +}; + +/* + * raid_base_bdev_info contains information for the base bdevs which are part of some + * raid. This structure contains the per base bdev information. Whatever is + * required per base device for raid bdev will be kept here + */ +struct raid_base_bdev_info { + /* pointer to base spdk bdev */ + struct spdk_bdev *base_bdev; + + /* pointer to base bdev descriptor opened by raid bdev */ + struct spdk_bdev_desc *base_bdev_desc; + + /* + * When underlying base device calls the hot plug function on drive removal, + * this flag will be set and later after doing some processing, base device + * descriptor will be closed + */ + bool base_bdev_remove_scheduled; +}; + +/* + * raid_bdev contains the information related to any raid bdev either configured or + * in configuring list + */ +struct raid_bdev { + /* link of raid bdev to link it to configured, configuring or offline list */ + TAILQ_ENTRY(raid_bdev) link_specific_list; + + /* link of raid bdev to link it to global raid bdev list */ + TAILQ_ENTRY(raid_bdev) link_global_list; + + /* pointer to config file entry */ + struct raid_bdev_config *raid_bdev_config; + + /* array of base bdev info */ + struct raid_base_bdev_info *base_bdev_info; + + /* strip size of raid bdev in blocks */ + uint32_t strip_size; + + /* strip size bit shift for optimized calculation */ + uint32_t strip_size_shift; + + /* block length bit shift for optimized calculation */ + uint32_t blocklen_shift; + + /* state of raid bdev */ + enum raid_bdev_state state; + + /* number of base bdevs comprising raid bdev */ + uint16_t num_base_bdevs; + + /* number of base bdevs discovered */ + uint16_t num_base_bdevs_discovered; + + /* Raid Level of this raid bdev */ + uint8_t raid_level; + + /* Set to true if destruct is called for this raid bdev */ + bool destruct_called; +}; + +/* + * raid_bdev_ctxt is the single entity structure for entire bdev which is + * allocated for any raid bdev + */ +struct raid_bdev_ctxt { + /* raid bdev device, this will get registered in bdev layer */ + struct spdk_bdev bdev; + + /* raid_bdev object, io device will be created on this */ + struct raid_bdev raid_bdev; +}; + +/* + * raid_bdev_io is the context part of bdev_io. It contains the information + * related to bdev_io for a pooled bdev + */ +struct raid_bdev_io { + /* WaitQ entry, used only in waitq logic */ + struct spdk_bdev_io_wait_entry waitq_entry; + + /* Original channel for this IO, used in queuing logic */ + struct spdk_io_channel *ch; + + /* current buffer location, used in queueing logic */ + uint8_t *buf; + + /* outstanding child completions */ + uint16_t splits_comp_outstanding; + + /* pending splits yet to happen */ + uint16_t splits_pending; + + /* status of parent io */ + bool status; +}; + +/* + * raid_base_bdev_config is the per base bdev data structure which contains + * information w.r.t to per base bdev during parsing config + */ +struct raid_base_bdev_config { + /* base bdev name from config file */ + char *bdev_name; +}; + +/* + * raid_bdev_config contains the raid bdev config related information after + * parsing the config file + */ +struct raid_bdev_config { + /* base bdev config per underlying bdev */ + struct raid_base_bdev_config *base_bdev; + + /* Points to already created raid bdev */ + struct raid_bdev_ctxt *raid_bdev_ctxt; + + char *name; + + /* strip size of this raid bdev in kilo bytes */ + uint32_t strip_size; + + /* number of base bdevs */ + uint8_t num_base_bdevs; + + /* raid level */ + uint8_t raid_level; +}; + +/* + * raid_config is the top level structure representing the raid bdev config as read + * from config file for all raids + */ +struct raid_config { + /* raid bdev context from config file */ + struct raid_bdev_config *raid_bdev_config; + + /* total raid bdev from config file */ + uint8_t total_raid_bdev; +}; + +/* + * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It + * contains the relationship of raid bdev io channel with base bdev io channels. + */ +struct raid_bdev_io_channel { + /* Array of IO channels of base bdevs */ + struct spdk_io_channel **base_bdevs_io_channel; + + /* raid bdev context pointer */ + struct raid_bdev_ctxt *raid_bdev_ctxt; +}; + +/* TAIL heads for various raid bdev lists */ +TAILQ_HEAD(spdk_raid_configured_tailq, raid_bdev); +TAILQ_HEAD(spdk_raid_configuring_tailq, raid_bdev); +TAILQ_HEAD(spdk_raid_all_tailq, raid_bdev); +TAILQ_HEAD(spdk_raid_offline_tailq, raid_bdev); + +extern struct spdk_raid_configured_tailq g_spdk_raid_bdev_configured_list; +extern struct spdk_raid_configuring_tailq g_spdk_raid_bdev_configuring_list; +extern struct spdk_raid_all_tailq g_spdk_raid_bdev_list; +extern struct spdk_raid_offline_tailq g_spdk_raid_bdev_offline_list; +extern struct raid_config g_spdk_raid_config; + + +void raid_bdev_remove_base_bdev(void *ctx); +int raid_bdev_add_base_device(struct spdk_bdev *bdev); + +#endif // SPDK_BDEV_RAID_INTERNAL_H diff --git a/lib/bdev/raid/bdev_raid_rpc.c b/lib/bdev/raid/bdev_raid_rpc.c new file mode 100644 index 000000000..8c6fc0ff9 --- /dev/null +++ b/lib/bdev/raid/bdev_raid_rpc.c @@ -0,0 +1,632 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/rpc.h" +#include "spdk/bdev.h" +#include "bdev_raid.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk_internal/log.h" +#include "spdk/env.h" + +#define RPC_MAX_BASE_BDEVS 255 + +static void raid_bdev_config_destroy(struct raid_bdev_config *raid_bdev_config); + +SPDK_LOG_REGISTER_COMPONENT("raidrpc", SPDK_LOG_RAID_RPC) + +/* + * brief: + * check_raid_bdev_present function tells if the raid bdev with given name already + * exists or not. + * params: + * name - raid bdev name + * returns: + * NULL - raid bdev not present + * non NULL - raid bdev present, returns raid_bdev_ctxt + */ +static struct raid_bdev_ctxt * +check_raid_bdev_present(char *raid_bdev_name) +{ + struct raid_bdev *raid_bdev; + struct raid_bdev_ctxt *raid_bdev_ctxt; + + TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, link_global_list) { + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(raid_bdev_ctxt->bdev.name, raid_bdev_name) == 0) { + /* raid bdev found */ + return raid_bdev_ctxt; + } + } + + return NULL; +} + +/* + * Input structure for get_raid_bdevs RPC + */ +struct rpc_get_raid_bdevs { + /* category - all or online or configuring or offline */ + char *category; +}; + +/* + * brief: + * free_rpc_get_raids function frees RPC get_raids related parameters + * params: + * req - pointer to RPC request + * returns: + * none + */ +static void +free_rpc_get_raid_bdevs(struct rpc_get_raid_bdevs *req) +{ + free(req->category); +} + +/* + * Decoder object for RPC get_raids + */ +static const struct spdk_json_object_decoder rpc_get_raid_bdevs_decoders[] = { + {"category", offsetof(struct rpc_get_raid_bdevs, category), spdk_json_decode_string}, +}; + +/* + * brief: + * spdk_rpc_get_raids function is the RPC for get_raids. This is used to list + * all the raid bdev names based on the input category requested. Category should be + * one of "all", "online", "configuring" or "offline". "all" means all the raids + * whether they are online or configuring or offline. "online" is the raid bdev which + * is registered with bdev layer. "configuring" is the raid bdev which does not have + * full configuration discovered yet. "offline" is the raid bdev which is not + * registered with bdev as of now and it has encountered any error or user has + * requested to offline the raid. + * params: + * requuest - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +spdk_rpc_get_raid_bdevs(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) +{ + struct rpc_get_raid_bdevs req = {}; + struct spdk_json_write_ctx *w; + struct raid_bdev *raid_bdev; + struct raid_bdev_ctxt *raid_bdev_ctxt; + + if (spdk_json_decode_object(params, rpc_get_raid_bdevs_decoders, + SPDK_COUNTOF(rpc_get_raid_bdevs_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + return; + } + + if (!(strcmp(req.category, "all") == 0 || + strcmp(req.category, "online") == 0 || + strcmp(req.category, "configuring") == 0 || + strcmp(req.category, "offline") == 0)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free_rpc_get_raid_bdevs(&req); + return; + } + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + free_rpc_get_raid_bdevs(&req); + return; + } + + spdk_json_write_array_begin(w); + + /* Get raid bdev list based on the category requested */ + if (strcmp(req.category, "all") == 0) { + TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_list, link_global_list) { + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + spdk_json_write_string(w, raid_bdev_ctxt->bdev.name); + } + } else if (strcmp(req.category, "online") == 0) { + TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configured_list, link_specific_list) { + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + spdk_json_write_string(w, raid_bdev_ctxt->bdev.name); + } + } else if (strcmp(req.category, "configuring") == 0) { + TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_configuring_list, link_specific_list) { + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + spdk_json_write_string(w, raid_bdev_ctxt->bdev.name); + } + } else { + TAILQ_FOREACH(raid_bdev, &g_spdk_raid_bdev_offline_list, link_specific_list) { + raid_bdev_ctxt = SPDK_CONTAINEROF(raid_bdev, struct raid_bdev_ctxt, raid_bdev); + spdk_json_write_string(w, raid_bdev_ctxt->bdev.name); + } + } + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); + free_rpc_get_raid_bdevs(&req); +} +SPDK_RPC_REGISTER("get_raid_bdevs", spdk_rpc_get_raid_bdevs, SPDK_RPC_RUNTIME) + +/* + * Base bdevs in RPC construct_raid + */ +struct rpc_construct_raid_base_bdevs { + /* Number of base bdevs */ + size_t num_base_bdevs; + + /* List of base bdevs names */ + char *base_bdevs[RPC_MAX_BASE_BDEVS]; +}; + +/* + * Input structure for RPC construct_raid + */ +struct rpc_construct_raid_bdev { + /* Raid bdev name */ + char *name; + + /* RAID strip size */ + uint32_t strip_size; + + /* RAID raid level */ + uint8_t raid_level; + + /* Base bdevs information */ + struct rpc_construct_raid_base_bdevs base_bdevs; +}; + +/* + * brief: + * free_rpc_construct_raid_bdev function is to free RPC construct_raid_bdev related parameters + * params: + * req - pointer to RPC request + * returns: + * none + */ +static void +free_rpc_construct_raid_bdev(struct rpc_construct_raid_bdev *req) +{ + free(req->name); + for (size_t iter = 0; iter < req->base_bdevs.num_base_bdevs; iter++) { + free(req->base_bdevs.base_bdevs[iter]); + } +} + +/* + * Decoder function for RPC construct_raid_bdev to decode base bdevs list + */ +static int +decode_base_bdevs(const struct spdk_json_val *val, void *out) +{ + struct rpc_construct_raid_base_bdevs *base_bdevs = out; + return spdk_json_decode_array(val, spdk_json_decode_string, base_bdevs->base_bdevs, + RPC_MAX_BASE_BDEVS, &base_bdevs->num_base_bdevs, sizeof(char *)); +} + +/* + * Decoder object for RPC construct_raid + */ +static const struct spdk_json_object_decoder rpc_construct_raid_bdev_decoders[] = { + {"name", offsetof(struct rpc_construct_raid_bdev, name), spdk_json_decode_string}, + {"strip_size", offsetof(struct rpc_construct_raid_bdev, strip_size), spdk_json_decode_uint32}, + {"raid_level", offsetof(struct rpc_construct_raid_bdev, raid_level), spdk_json_decode_uint32}, + {"base_bdevs", offsetof(struct rpc_construct_raid_bdev, base_bdevs), decode_base_bdevs}, +}; + +/* + * brief: + * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration + * params: + * none + * returns: + * none + */ +static void +raid_bdev_config_cleanup(void) +{ + void *temp_ptr; + + temp_ptr = realloc(g_spdk_raid_config.raid_bdev_config, + sizeof(struct raid_bdev_config) * (g_spdk_raid_config.total_raid_bdev - 1)); + if (temp_ptr != NULL) { + g_spdk_raid_config.raid_bdev_config = temp_ptr; + } else { + SPDK_ERRLOG("Config memory allocation failed\n"); + assert(0); + } + g_spdk_raid_config.total_raid_bdev--; +} + +/* + * brief: + * check_and_remove_raid_bdev function free base bdev descriptors, unclaim the base + * bdevs and free the raid. This function is used to cleanup when raid is not + * able to successfully create during constructing the raid via RPC + * params: + * raid_bdev_config - pointer to raid_bdev_config structure + * returns: + * NULL - raid not present + * non NULL - raid present, returns raid_bdev_ctxt + */ +static void +check_and_remove_raid_bdev(struct raid_bdev_config *raid_bdev_config) +{ + struct raid_bdev *raid_bdev; + struct raid_bdev_ctxt *raid_bdev_ctxt; + + /* Get the raid structured allocated if exists */ + raid_bdev_ctxt = raid_bdev_config->raid_bdev_ctxt; + if (raid_bdev_ctxt == NULL) { + return; + } + + /* + * raid should be in configuring state as this function is used to cleanup + * the raid during unsuccessful construction of raid + */ + assert(raid_bdev_ctxt->raid_bdev.state == RAID_BDEV_STATE_CONFIGURING); + raid_bdev = &raid_bdev_ctxt->raid_bdev; + for (uint32_t iter = 0; iter < raid_bdev->num_base_bdevs; iter++) { + assert(raid_bdev->base_bdev_info != NULL); + if (raid_bdev->base_bdev_info[iter].base_bdev) { + /* Release base bdev related resources */ + spdk_bdev_module_release_bdev(raid_bdev->base_bdev_info[iter].base_bdev); + spdk_bdev_close(raid_bdev->base_bdev_info[iter].base_bdev_desc); + raid_bdev->base_bdev_info[iter].base_bdev_desc = NULL; + raid_bdev->base_bdev_info[iter].base_bdev = NULL; + assert(raid_bdev->num_base_bdevs_discovered); + raid_bdev->num_base_bdevs_discovered--; + } + } + /* Free raid */ + assert(raid_bdev->num_base_bdevs_discovered == 0); + TAILQ_REMOVE(&g_spdk_raid_bdev_configuring_list, raid_bdev, link_specific_list); + TAILQ_REMOVE(&g_spdk_raid_bdev_list, raid_bdev, link_global_list); + free(raid_bdev->base_bdev_info); + free(raid_bdev_ctxt); + raid_bdev_config->raid_bdev_ctxt = NULL; +} + +/* + * brief: + * spdk_rpc_construct_raid_bdev function is the RPC for construct_raids. It takes + * input as raid bdev name, raid level, strip size in KB and list of base bdev names. + * params: + * requuest - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +spdk_rpc_construct_raid_bdev(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_construct_raid_bdev req = {}; + struct spdk_json_write_ctx *w; + struct raid_bdev_ctxt *raid_bdev_ctxt; + void *temp_ptr; + struct raid_base_bdev_config *base_bdevs; + struct raid_bdev_config *raid_bdev_config; + struct spdk_bdev *base_bdev; + + if (spdk_json_decode_object(params, rpc_construct_raid_bdev_decoders, + SPDK_COUNTOF(rpc_construct_raid_bdev_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + return; + } + + /* Fail the command if raid bdev is already present */ + raid_bdev_ctxt = check_raid_bdev_present(req.name); + if (raid_bdev_ctxt != NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "raid bdev already present"); + free_rpc_construct_raid_bdev(&req); + return; + } + + /* Fail the command if input raid level is other than 0 */ + if (req.raid_level != 0) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "invalid raid level"); + free_rpc_construct_raid_bdev(&req); + return; + } + + if (spdk_u32_is_pow2(req.strip_size) == false) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "invalid strip size"); + free_rpc_construct_raid_bdev(&req); + return; + } + + base_bdevs = calloc(req.base_bdevs.num_base_bdevs, sizeof(struct raid_base_bdev_config)); + if (base_bdevs == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(ENOMEM)); + free_rpc_construct_raid_bdev(&req); + return; + } + + /* Insert the new raid bdev config entry */ + temp_ptr = realloc(g_spdk_raid_config.raid_bdev_config, + sizeof(struct raid_bdev_config) * (g_spdk_raid_config.total_raid_bdev + 1)); + if (temp_ptr == NULL) { + free(base_bdevs); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_strerror(ENOMEM)); + free_rpc_construct_raid_bdev(&req); + return; + } + g_spdk_raid_config.raid_bdev_config = temp_ptr; + for (size_t iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + g_spdk_raid_config.raid_bdev_config[iter].raid_bdev_ctxt->raid_bdev.raid_bdev_config = + &g_spdk_raid_config.raid_bdev_config[iter]; + } + raid_bdev_config = &g_spdk_raid_config.raid_bdev_config[g_spdk_raid_config.total_raid_bdev]; + memset(raid_bdev_config, 0, sizeof(*raid_bdev_config)); + raid_bdev_config->name = req.name; + raid_bdev_config->strip_size = req.strip_size; + raid_bdev_config->num_base_bdevs = req.base_bdevs.num_base_bdevs; + raid_bdev_config->raid_level = req.raid_level; + g_spdk_raid_config.total_raid_bdev++; + raid_bdev_config->base_bdev = base_bdevs; + for (size_t iter = 0; iter < raid_bdev_config->num_base_bdevs; iter++) { + raid_bdev_config->base_bdev[iter].bdev_name = req.base_bdevs.base_bdevs[iter]; + } + + for (size_t iter = 0; iter < raid_bdev_config->num_base_bdevs; iter++) { + /* Check if base_bdev exists already, if not fail the command */ + base_bdev = spdk_bdev_get_by_name(req.base_bdevs.base_bdevs[iter]); + if (base_bdev == NULL) { + check_and_remove_raid_bdev(&g_spdk_raid_config.raid_bdev_config[g_spdk_raid_config.total_raid_bdev - + 1]); + raid_bdev_config_cleanup(); + free(base_bdevs); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "base bdev not found"); + free_rpc_construct_raid_bdev(&req); + return; + } + + /* + * Try to add base_bdev to this raid bdev, if not able to add fail the + * command. This might be because this base_bdev may already be claimed + * by some other module + */ + if (raid_bdev_add_base_device(base_bdev)) { + check_and_remove_raid_bdev(&g_spdk_raid_config.raid_bdev_config[g_spdk_raid_config.total_raid_bdev - + 1]); + raid_bdev_config_cleanup(); + free(base_bdevs); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "base bdev can't be added because of either memory allocation failed or not able to claim"); + free_rpc_construct_raid_bdev(&req); + return; + } + } + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + return; + } + + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +} +SPDK_RPC_REGISTER("construct_raid_bdev", spdk_rpc_construct_raid_bdev, SPDK_RPC_RUNTIME) + +/* + * Input structure for RPC destroy_raid + */ +struct rpc_destroy_raid_bdev { + /* raid bdev name */ + char *name; +}; + +/* + * brief: + * free_rpc_destroy_raid_bdev function is used to free RPC destroy_raid_bdev related parameters + * params: + * req - pointer to RPC request + * params: + * none + */ +static void +free_rpc_destroy_raid_bdev(struct rpc_destroy_raid_bdev *req) +{ + free(req->name); +} + +/* + * Decoder object for RPC destroy_raid + */ +static const struct spdk_json_object_decoder rpc_destroy_raid_bdev_decoders[] = { + {"name", offsetof(struct rpc_destroy_raid_bdev, name), spdk_json_decode_string}, +}; + +/* + * brief: + * Since destroying raid_bdev is asynchronous operation, so this function is + * used to check if raid bdev still exists. If raid bdev is still there it will create + * event and check later, otherwise it will proceed with cleanup + * params: + * arg - pointer to raid bdev cfg + * returns: + * none + */ +static void +raid_bdev_config_destroy_check_raid_bdev_exists(void *arg) +{ + struct raid_bdev_config *raid_cfg = arg; + + assert(raid_cfg != NULL); + if (raid_cfg->raid_bdev_ctxt != NULL) { + /* If raid bdev still exists, schedule event and come back later */ + spdk_thread_send_msg(spdk_get_thread(), raid_bdev_config_destroy_check_raid_bdev_exists, raid_cfg); + return; + } else { + /* If raid bdev does not exist now, go for raid bdev config cleanup */ + raid_bdev_config_destroy(raid_cfg); + } +} + +/* + * brief: + * This function will destroy the raid bdev at given slot + * params: + * slot - slot number of raid bdev config to destroy + * returns: + * none + */ +static void +raid_bdev_config_destroy(struct raid_bdev_config *raid_cfg) +{ + void *temp_ptr; + uint8_t iter; + struct raid_bdev_config *raid_cfg_next; + uint8_t slot; + + assert(raid_cfg != NULL); + if (raid_cfg->raid_bdev_ctxt != NULL) { + /* + * If raid bdev exists for this config, wait for raid bdev to get + * destroyed and come back later + */ + spdk_thread_send_msg(spdk_get_thread(), raid_bdev_config_destroy_check_raid_bdev_exists, raid_cfg); + return; + } + + /* Destroy raid bdev config and cleanup */ + for (uint8_t iter2 = 0; iter2 < raid_cfg->num_base_bdevs; iter2++) { + free(raid_cfg->base_bdev[iter2].bdev_name); + } + free(raid_cfg->base_bdev); + free(raid_cfg->name); + slot = raid_cfg - g_spdk_raid_config.raid_bdev_config; + assert(slot < g_spdk_raid_config.total_raid_bdev); + if (slot != g_spdk_raid_config.total_raid_bdev - 1) { + iter = slot; + while (iter < g_spdk_raid_config.total_raid_bdev - 1) { + raid_cfg = &g_spdk_raid_config.raid_bdev_config[iter]; + raid_cfg_next = &g_spdk_raid_config.raid_bdev_config[iter + 1]; + raid_cfg->base_bdev = raid_cfg_next->base_bdev; + raid_cfg->raid_bdev_ctxt = raid_cfg_next->raid_bdev_ctxt; + raid_cfg->name = raid_cfg_next->name; + raid_cfg->strip_size = raid_cfg_next->strip_size; + raid_cfg->num_base_bdevs = raid_cfg_next->num_base_bdevs; + raid_cfg->raid_level = raid_cfg_next->raid_level; + iter++; + } + } + temp_ptr = realloc(g_spdk_raid_config.raid_bdev_config, + sizeof(struct raid_bdev_config) * (g_spdk_raid_config.total_raid_bdev - 1)); + if (temp_ptr != NULL) { + g_spdk_raid_config.raid_bdev_config = temp_ptr; + g_spdk_raid_config.total_raid_bdev--; + for (iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + g_spdk_raid_config.raid_bdev_config[iter].raid_bdev_ctxt->raid_bdev.raid_bdev_config = + &g_spdk_raid_config.raid_bdev_config[iter]; + } + } else { + if (g_spdk_raid_config.total_raid_bdev == 1) { + g_spdk_raid_config.total_raid_bdev--; + g_spdk_raid_config.raid_bdev_config = NULL; + } else { + SPDK_ERRLOG("Config memory allocation failed\n"); + assert(0); + } + } +} + +/* + * brief: + * spdk_rpc_destroy_raid_bdev function is the RPC for destroy_raid. It takes raid + * name as input and destroy that raid bdev including freeing the base bdev + * resources. + * params: + * requuest - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +spdk_rpc_destroy_raid_bdev(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) +{ + struct rpc_destroy_raid_bdev req = {}; + struct spdk_json_write_ctx *w; + struct raid_bdev_config *raid_bdev_config = NULL; + struct spdk_bdev *base_bdev; + + if (spdk_json_decode_object(params, rpc_destroy_raid_bdev_decoders, + SPDK_COUNTOF(rpc_destroy_raid_bdev_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + return; + } + + /* Find raid bdev config for this raid bdev */ + for (uint32_t iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + if (strcmp(g_spdk_raid_config.raid_bdev_config[iter].name, req.name) == 0) { + raid_bdev_config = &g_spdk_raid_config.raid_bdev_config[iter]; + break; + } + } + + if (raid_bdev_config == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, + "raid bdev name not found"); + free_rpc_destroy_raid_bdev(&req); + return; + } + + /* Remove all the base bdevs from this raid bdev before destroying the raid bdev */ + for (uint32_t iter = 0; iter < raid_bdev_config->num_base_bdevs; iter++) { + base_bdev = spdk_bdev_get_by_name(raid_bdev_config->base_bdev[iter].bdev_name); + if (base_bdev != NULL) { + raid_bdev_remove_base_bdev(base_bdev); + } + } + + /* + * Call to destroy the raid bdev, but it will only destroy raid bdev if underlying + * cleanup is done + */ + raid_bdev_config_destroy(raid_bdev_config); + + w = spdk_jsonrpc_begin_result(request); + if (w == NULL) { + free_rpc_destroy_raid_bdev(&req); + return; + } + + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); + free_rpc_destroy_raid_bdev(&req); +} +SPDK_RPC_REGISTER("destroy_raid_bdev", spdk_rpc_destroy_raid_bdev, SPDK_RPC_RUNTIME) diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index 528a7a973..ef9e8c515 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -57,6 +57,10 @@ BLOCKDEV_MODULES_LIST += bdev_rbd BLOCKDEV_MODULES_DEPS += -lrados -lrbd endif +ifeq ($(CONFIG_RAID),y) +BLOCKDEV_MODULES_LIST += vbdev_raid +endif + ifeq ($(CONFIG_PMDK),y) BLOCKDEV_MODULES_LIST += bdev_pmem BLOCKDEV_MODULES_DEPS += -lpmemblk diff --git a/scripts/rpc.py b/scripts/rpc.py index e1008f2fa..2f1bb617b 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -960,6 +960,45 @@ if __name__ == "__main__": p.add_argument('-l', '--lvs-name', help='lvol store name', required=False) p.set_defaults(func=get_lvol_stores) + @call_cmd + def get_raid_bdevs(args): + print_array(rpc.bdev.get_raid_bdevs(args.client, + category=args.category)) + + p = subparsers.add_parser('get_raid_bdevs', help="""This is used to list all the raid bdev names based on the input category + requested. Category should be one of 'all', 'online', 'configuring' or 'offline'. 'all' means all the raid bdevs whether + they are online or configuring or offline. 'online' is the raid bdev which is registered with bdev layer. 'configuring' + is the raid bdev which does not have full configuration discovered yet. 'offline' is the raid bdev which is not registered + with bdev as of now and it has encountered any error or user has requested to offline the raid bdev""") + p.add_argument('category', help='all or online or configuring or offline') + p.set_defaults(func=get_raid_bdevs) + + @call_cmd + def construct_raid_bdev(args): + base_bdevs = [] + for u in args.base_bdevs.strip().split(" "): + base_bdevs.append(u) + + rpc.bdev.construct_raid_bdev(args.client, + name=args.name, + strip_size=args.strip_size, + raid_level=args.raid_level, + base_bdevs=base_bdevs) + p = subparsers.add_parser('construct_raid_bdev', help='Construct new raid bdev') + p.add_argument('-n', '--name', help='raid bdev name', required=True) + p.add_argument('-s', '--strip-size', help='strip size in KB', type=int, required=True) + p.add_argument('-r', '--raid-level', help='raid level, only raid level 0 is supported', type=int, required=True) + p.add_argument('-b', '--base-bdevs', help='base bdevs name, whitespace separated list in quotes', required=True) + p.set_defaults(func=construct_raid_bdev) + + @call_cmd + def destroy_raid_bdev(args): + rpc.bdev.destroy_raid_bdev(args.client, + name=args.name) + p = subparsers.add_parser('destroy_raid_bdev', help='Destroy existing raid bdev') + p.add_argument('name', help='raid bdev name') + p.set_defaults(func=destroy_raid_bdev) + # split @call_cmd def construct_split_vbdev(args): diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index 7eb0c8ae7..93487c7c9 100755 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -74,6 +74,49 @@ def delete_null_bdev(client, name): return client.call('delete_null_bdev', params) +def get_raid_bdevs(client, category): + """Get list of raid bdevs based on category + + Args: + category: any one of all or online or configuring or offline + + Returns: + List of raid bdev names + """ + params = {'category': category} + return client.call('get_raid_bdevs', params) + + +def construct_raid_bdev(client, name, strip_size, raid_level, base_bdevs): + """Construct pooled device + + Args: + name: user defined raid bdev name + strip_size: strip size of raid bdev in KB, supported values like 8, 16, 32, 64, 128, 256, 512, 1024 etc + raid_level: raid level of raid bdev, supported values 0 + base_bdevs: Space separated names of Nvme bdevs in double quotes, like "Nvme0n1 Nvme1n1 Nvme2n1" + + Returns: + None + """ + params = {'name': name, 'strip_size': strip_size, 'raid_level': raid_level, 'base_bdevs': base_bdevs} + + return client.call('construct_raid_bdev', params) + + +def destroy_raid_bdev(client, name): + """Destroy pooled device + + Args: + name: raid bdev name + + Returns: + None + """ + params = {'name': name} + return client.call('destroy_raid_bdev', params) + + def construct_aio_bdev(client, filename, name, block_size=None): """Construct a Linux AIO block device. diff --git a/test/unit/lib/bdev/Makefile b/test/unit/lib/bdev/Makefile index d105e8450..3721b5eea 100644 --- a/test/unit/lib/bdev/Makefile +++ b/test/unit/lib/bdev/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = bdev.c part.c scsi_nvme.c gpt vbdev_lvol.c mt +DIRS-y = bdev.c part.c scsi_nvme.c gpt vbdev_lvol.c mt bdev_raid.c DIRS-$(CONFIG_PMDK) += pmem diff --git a/test/unit/lib/bdev/bdev_raid.c/.gitignore b/test/unit/lib/bdev/bdev_raid.c/.gitignore new file mode 100644 index 000000000..98d1a166e --- /dev/null +++ b/test/unit/lib/bdev/bdev_raid.c/.gitignore @@ -0,0 +1 @@ +bdev_raid_ut diff --git a/test/unit/lib/bdev/bdev_raid.c/Makefile b/test/unit/lib/bdev/bdev_raid.c/Makefile new file mode 100644 index 000000000..239f72187 --- /dev/null +++ b/test/unit/lib/bdev/bdev_raid.c/Makefile @@ -0,0 +1,56 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +SPDK_LIB_LIST = log + +CFLAGS += -I$(SPDK_ROOT_DIR)/test +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev +LIBS += $(SPDK_LIB_LINKER_ARGS) +LIBS += -lcunit + +APP = bdev_raid_ut +C_SRCS = bdev_raid_ut.c + +all: $(APP) + +$(APP): $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean: + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c b/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c new file mode 100644 index 000000000..02b73c52d --- /dev/null +++ b/test/unit/lib/bdev/bdev_raid.c/bdev_raid_ut.c @@ -0,0 +1,2123 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk_cunit.h" +#include "spdk/env.h" +#include "spdk_internal/mock.h" +#include "raid/bdev_raid.c" +#include "raid/bdev_raid_rpc.c" + +#define MAX_BASE_DRIVES 255 +#define MAX_RAIDS 31 +#define INVALID_IO_SUBMIT 0xFFFF + +/* Data structure to capture the output of IO for verification */ +struct io_output { + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + void *buf; + uint64_t offset_blocks; + uint64_t num_blocks; + spdk_bdev_io_completion_cb cb; + void *cb_arg; + enum spdk_bdev_io_type iotype; +}; + +/* Different test options, more options to test can be added here */ +uint32_t g_blklen_opts[] = {512, 4096}; +uint32_t g_strip_opts[] = {64, 128, 256, 512, 1024, 2048}; +uint32_t g_iosize_opts[] = {256, 512, 1024}; +uint32_t g_max_qd_opts[] = {64, 128, 256, 512, 1024, 2048}; + +/* Globals */ +int g_bdev_io_submit_status; +struct io_output *g_io_output = NULL; +uint32_t g_io_output_index; +uint32_t g_io_comp_status; +bool g_child_io_status_flag; +void *rpc_req; +uint32_t rpc_req_size; +TAILQ_HEAD(bdev, spdk_bdev); +struct bdev g_bdev_list; +TAILQ_HEAD(waitq, spdk_bdev_io_wait_entry); +struct waitq g_io_waitq; +uint32_t g_block_len; +uint32_t g_strip_size; +uint32_t g_max_io_size; +uint32_t g_max_qd; +uint8_t g_max_base_drives; +uint8_t g_max_raids; +uint8_t g_ignore_io_output; +uint8_t g_rpc_err; +char *g_get_raids_output[MAX_RAIDS]; +uint32_t g_get_raids_count; +uint8_t g_json_beg_res_ret_err; +uint8_t g_json_decode_obj_err; +uint8_t g_config_level_create = 0; +uint8_t g_test_multi_raids; + +/* Set randomly test options, in every run it is different */ +static void +set_test_opts(void) +{ + uint32_t seed = time(0); + + /* Generate random test options */ + srand(seed); + g_max_base_drives = (rand() % MAX_BASE_DRIVES) + 1; + g_max_raids = (rand() % MAX_RAIDS) + 1; + g_block_len = g_blklen_opts[rand() % SPDK_COUNTOF(g_blklen_opts)]; + g_strip_size = g_strip_opts[rand() % SPDK_COUNTOF(g_strip_opts)]; + g_max_io_size = g_iosize_opts[rand() % SPDK_COUNTOF(g_iosize_opts)]; + g_max_qd = g_max_qd_opts[rand() % SPDK_COUNTOF(g_max_qd_opts)]; + + printf("Test Options, seed = %u\n", seed); + printf("blocklen = %u, strip_size = %u, max_io_size = %u, max_qd = %u, g_max_base_drives = %u, g_max_raids = %u\n", + g_block_len, g_strip_size, g_max_io_size, g_max_qd, g_max_base_drives, g_max_raids); +} + +/* Set globals before every test run */ +static void +set_globals(void) +{ + uint32_t max_splits; + + g_bdev_io_submit_status = 0; + if (g_max_io_size < g_strip_size) { + max_splits = 2; + } else { + max_splits = (g_max_io_size / g_strip_size) + 1; + } + g_io_output = calloc(max_splits, sizeof(struct io_output)); + SPDK_CU_ASSERT_FATAL(g_io_output != NULL); + g_io_output_index = 0; + memset(g_get_raids_output, 0, sizeof(g_get_raids_output)); + g_get_raids_count = 0; + g_io_comp_status = 0; + g_ignore_io_output = 0; + g_config_level_create = 0; + g_rpc_err = 0; + g_test_multi_raids = 0; + g_child_io_status_flag = true; + TAILQ_INIT(&g_bdev_list); + TAILQ_INIT(&g_io_waitq); + rpc_req = NULL; + rpc_req_size = 0; + g_json_beg_res_ret_err = 0; + g_json_decode_obj_err = 0; +} + +static void +base_bdevs_cleanup(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev *bdev_next; + + if (!TAILQ_EMPTY(&g_bdev_list)) { + TAILQ_FOREACH_SAFE(bdev, &g_bdev_list, internal.link, bdev_next) { + free(bdev->name); + TAILQ_REMOVE(&g_bdev_list, bdev, internal.link); + free(bdev); + } + } +} + +/* Reset globals */ +static void +reset_globals(void) +{ + if (g_io_output) { + free(g_io_output); + g_io_output = NULL; + } + rpc_req = NULL; + rpc_req_size = 0; +} + +/* Store the IO completion status in global variable to verify by various tests */ +void +spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) +{ + g_io_comp_status = ((status == SPDK_BDEV_IO_STATUS_SUCCESS) ? true : false); +} + +/* It will cache the split IOs for verification */ +int +spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct io_output *p = &g_io_output[g_io_output_index]; + struct spdk_bdev_io *child_io; + + if (g_ignore_io_output) { + return 0; + } + + if (g_max_io_size < g_strip_size) { + SPDK_CU_ASSERT_FATAL(g_io_output_index < 2); + } else { + SPDK_CU_ASSERT_FATAL(g_io_output_index < (g_max_io_size / g_strip_size) + 1); + } + if (g_bdev_io_submit_status == 0) { + p->desc = desc; + p->ch = ch; + p->buf = buf; + p->offset_blocks = offset_blocks; + p->num_blocks = num_blocks; + p->cb = cb; + p->cb_arg = cb_arg; + p->iotype = SPDK_BDEV_IO_TYPE_WRITE; + g_io_output_index++; + child_io = calloc(1, sizeof(struct spdk_bdev_io)); + SPDK_CU_ASSERT_FATAL(child_io != NULL); + cb(child_io, g_child_io_status_flag, cb_arg); + } + + return g_bdev_io_submit_status; +} + +void +spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) +{ + bdev->fn_table->destruct(bdev->ctxt); +} + +int +spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, + void *remove_ctx, struct spdk_bdev_desc **_desc) +{ + *_desc = (void *)0x1; + return 0; +} + +void +spdk_put_io_channel(struct spdk_io_channel *ch) +{ + CU_ASSERT(ch == (void *)1); +} + +struct spdk_io_channel * +spdk_get_io_channel(void *io_device) +{ + return NULL; +} + +void +spdk_poller_unregister(struct spdk_poller **ppoller) +{ +} + +struct spdk_poller * +spdk_poller_register(spdk_poller_fn fn, + void *arg, + uint64_t period_microseconds) +{ + return (void *)1; +} + +void +spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb) +{ +} + +char * +spdk_sprintf_alloc(const char *format, ...) +{ + return strdup(format); +} + +void +spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb, + spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size) +{ +} + +int +spdk_json_write_name(struct spdk_json_write_ctx *w, const char *name) +{ + return 0; +} + +int spdk_json_write_named_uint32(struct spdk_json_write_ctx *w, const char *name, uint32_t val) +{ + struct rpc_construct_raid_bdev *req = rpc_req; + if (strcmp(name, "strip_size") == 0) { + CU_ASSERT(req->strip_size * 1024 / g_block_len == val); + } else if (strcmp(name, "blocklen_shift") == 0) { + CU_ASSERT(spdk_u32log2(g_block_len) == val); + } else if (strcmp(name, "raid_level") == 0) { + CU_ASSERT(req->raid_level == val); + } else if (strcmp(name, "num_base_bdevs") == 0) { + CU_ASSERT(req->base_bdevs.num_base_bdevs == val); + } else if (strcmp(name, "state") == 0) { + CU_ASSERT(val == RAID_BDEV_STATE_ONLINE); + } else if (strcmp(name, "destruct_called") == 0) { + CU_ASSERT(val == 0); + } else if (strcmp(name, "num_base_bdevs_discovered") == 0) { + CU_ASSERT(req->base_bdevs.num_base_bdevs == val); + } + return 0; +} + +int spdk_json_write_named_string(struct spdk_json_write_ctx *w, const char *name, const char *val) +{ + return 0; +} + +int +spdk_json_write_object_begin(struct spdk_json_write_ctx *w) +{ + return 0; +} + +int +spdk_json_write_array_end(struct spdk_json_write_ctx *w) +{ + return 0; +} + +int +spdk_json_write_object_end(struct spdk_json_write_ctx *w) +{ + return 0; +} + +int +spdk_json_write_bool(struct spdk_json_write_ctx *w, bool val) +{ + return 0; +} + +int spdk_json_write_null(struct spdk_json_write_ctx *w) +{ + return 0; +} + +struct spdk_io_channel * +spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) +{ + return (void *)1; +} + +void +spdk_for_each_thread(spdk_thread_fn fn, void *ctx, spdk_thread_fn cpl) +{ + fn(ctx); + cpl(ctx); +} + +struct spdk_thread * +spdk_get_thread(void) +{ + return NULL; +} + +void +spdk_thread_send_msg(const struct spdk_thread *thread, spdk_thread_fn fn, void *ctx) +{ + fn(ctx); +} + +uint32_t +spdk_env_get_current_core(void) +{ + return 0; +} + +void +spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) +{ + if (bdev_io) { + free(bdev_io); + } +} + +/* It will cache split IOs for verification */ +int +spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + struct io_output *p = &g_io_output[g_io_output_index]; + struct spdk_bdev_io *child_io; + + if (g_ignore_io_output) { + return 0; + } + + SPDK_CU_ASSERT_FATAL(g_io_output_index <= (g_max_io_size / g_strip_size) + 1); + if (g_bdev_io_submit_status == 0) { + p->desc = desc; + p->ch = ch; + p->buf = buf; + p->offset_blocks = offset_blocks; + p->num_blocks = num_blocks; + p->cb = cb; + p->cb_arg = cb_arg; + p->iotype = SPDK_BDEV_IO_TYPE_READ; + g_io_output_index++; + child_io = calloc(1, sizeof(struct spdk_bdev_io)); + SPDK_CU_ASSERT_FATAL(child_io != NULL); + cb(child_io, g_child_io_status_flag, cb_arg); + } + + return g_bdev_io_submit_status; +} + +void +spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) +{ + CU_ASSERT(bdev->internal.claim_module != NULL); + bdev->internal.claim_module = NULL; +} + +void +spdk_bdev_module_examine_done(struct spdk_bdev_module *module) +{ +} + +struct spdk_conf_section * +spdk_conf_first_section(struct spdk_conf *cp) +{ + if (g_config_level_create) { + return (void *) 0x1; + } + + return NULL; +} + +bool +spdk_conf_section_match_prefix(const struct spdk_conf_section *sp, const char *name_prefix) +{ + if (g_config_level_create) { + return true; + } + + return false; +} + +char * +spdk_conf_section_get_val(struct spdk_conf_section *sp, const char *key) +{ + struct rpc_construct_raid_bdev *req = rpc_req; + + if (g_config_level_create) { + if (strcmp(key, "Name") == 0) { + return req->name; + } + } + + return NULL; +} + +int +spdk_conf_section_get_intval(struct spdk_conf_section *sp, const char *key) +{ + struct rpc_construct_raid_bdev *req = rpc_req; + + if (g_config_level_create) { + if (strcmp(key, "StripSize") == 0) { + return req->strip_size; + } else if (strcmp(key, "NumDevices") == 0) { + return req->base_bdevs.num_base_bdevs; + } else if (strcmp(key, "RaidLevel") == 0) { + return req->raid_level; + } + } + + return 0; +} + +struct spdk_conf_section * +spdk_conf_next_section(struct spdk_conf_section *sp) +{ + return NULL; +} + +char * +spdk_conf_section_get_nmval(struct spdk_conf_section *sp, const char *key, int idx1, int idx2) +{ + struct rpc_construct_raid_bdev *req = rpc_req; + + if (g_config_level_create) { + if (strcmp(key, "Devices") == 0) { + if (idx2 >= g_max_base_drives) { + return NULL; + } + return req->base_bdevs.base_bdevs[idx2]; + } + } + + return NULL; +} + +void +spdk_bdev_close(struct spdk_bdev_desc *desc) +{ +} + +int +spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, + struct spdk_bdev_module *module) +{ + if (bdev->internal.claim_module != NULL) { + return -1; + } + bdev->internal.claim_module = module; + return 0; +} + +int +spdk_bdev_register(struct spdk_bdev *bdev) +{ + return 0; +} + +uint32_t +spdk_env_get_last_core(void) +{ + return 0; +} + +int +spdk_json_decode_string(const struct spdk_json_val *val, void *out) +{ + return 0; +} + +int +spdk_json_decode_object(const struct spdk_json_val *values, + const struct spdk_json_object_decoder *decoders, size_t num_decoders, void *out) +{ + if (g_json_decode_obj_err) { + return -1; + } else { + memcpy(out, rpc_req, rpc_req_size); + return 0; + } +} + +struct spdk_json_write_ctx * +spdk_jsonrpc_begin_result(struct spdk_jsonrpc_request *request) +{ + if (g_json_beg_res_ret_err) { + return NULL; + } else { + return (void *)1; + } +} + +int +spdk_json_write_array_begin(struct spdk_json_write_ctx *w) +{ + return 0; +} + +int +spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val) +{ + if (g_test_multi_raids) { + g_get_raids_output[g_get_raids_count] = strdup(val); + SPDK_CU_ASSERT_FATAL(g_get_raids_output[g_get_raids_count] != NULL); + g_get_raids_count++; + } + + return 0; +} + +void +spdk_jsonrpc_send_error_response(struct spdk_jsonrpc_request *request, + int error_code, const char *msg) +{ + g_rpc_err = 1; +} + +void +spdk_jsonrpc_end_result(struct spdk_jsonrpc_request *request, struct spdk_json_write_ctx *w) +{ +} + +struct spdk_bdev * +spdk_bdev_get_by_name(const char *bdev_name) +{ + struct spdk_bdev *bdev; + + if (!TAILQ_EMPTY(&g_bdev_list)) { + TAILQ_FOREACH(bdev, &g_bdev_list, internal.link) { + if (strcmp(bdev_name, bdev->name) == 0) { + return bdev; + } + } + } + + return NULL; +} + +const char * +spdk_strerror(int errnum) +{ + return NULL; +} + +int +spdk_json_decode_array(const struct spdk_json_val *values, spdk_json_decode_fn decode_func, + void *out, size_t max_size, size_t *out_size, size_t stride) +{ + return 0; +} + +void +spdk_rpc_register_method(const char *method, spdk_rpc_method_handler func, uint32_t state_mask) +{ +} + +int +spdk_json_decode_uint32(const struct spdk_json_val *val, void *out) +{ + return 0; +} + + +void +spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module) +{ +} + +static void +bdev_io_cleanup(struct spdk_bdev_io *bdev_io) +{ + if (bdev_io->u.bdev.iovs) { + if (bdev_io->u.bdev.iovs->iov_base) { + free(bdev_io->u.bdev.iovs->iov_base); + bdev_io->u.bdev.iovs->iov_base = NULL; + } + free(bdev_io->u.bdev.iovs); + bdev_io->u.bdev.iovs = NULL; + } +} + +static void +bdev_io_initialize(struct spdk_bdev_io *bdev_io, uint64_t lba, uint64_t blocks, int16_t iotype) +{ + bdev_io->u.bdev.offset_blocks = lba; + bdev_io->u.bdev.num_blocks = blocks; + bdev_io->type = iotype; + bdev_io->u.bdev.iovcnt = 1; + bdev_io->u.bdev.iovs = calloc(1, sizeof(struct iovec)); + SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs != NULL); + bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * g_block_len); + SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL); + bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * g_block_len; + bdev_io->u.bdev.iovs = bdev_io->u.bdev.iovs; +} + +static void +verify_io(struct spdk_bdev_io *bdev_io, uint8_t num_base_drives, + struct raid_bdev_io_channel *ch_ctx, struct raid_bdev *raid_bdev, uint32_t io_status) +{ + uint32_t strip_shift = spdk_u32log2(g_strip_size); + uint64_t start_strip = bdev_io->u.bdev.offset_blocks >> strip_shift; + uint64_t end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> + strip_shift; + uint32_t splits_reqd = (end_strip - start_strip + 1); + uint32_t strip; + uint64_t pd_strip; + uint64_t pd_idx; + uint32_t offset_in_strip; + uint64_t pd_lba; + uint64_t pd_blocks; + uint32_t index = 0; + uint8_t *buf = bdev_io->u.bdev.iovs->iov_base; + + if (io_status == INVALID_IO_SUBMIT) { + CU_ASSERT(g_io_comp_status == false); + return; + } + SPDK_CU_ASSERT_FATAL(raid_bdev != NULL); + SPDK_CU_ASSERT_FATAL(num_base_drives != 0); + + if (raid_bdev->num_base_bdevs > 1) { + CU_ASSERT(splits_reqd == g_io_output_index); + for (strip = start_strip; strip <= end_strip; strip++, index++) { + pd_strip = strip / num_base_drives; + pd_idx = strip % num_base_drives; + if (strip == start_strip) { + offset_in_strip = bdev_io->u.bdev.offset_blocks & (g_strip_size - 1); + pd_lba = (pd_strip << strip_shift) + offset_in_strip; + if (strip == end_strip) { + pd_blocks = bdev_io->u.bdev.num_blocks; + } else { + pd_blocks = g_strip_size - offset_in_strip; + } + } else if (strip == end_strip) { + pd_lba = pd_strip << strip_shift; + pd_blocks = ((bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) & + (g_strip_size - 1)) + 1; + } else { + pd_lba = pd_strip << raid_bdev->strip_size_shift; + pd_blocks = raid_bdev->strip_size; + } + CU_ASSERT(pd_lba == g_io_output[index].offset_blocks); + CU_ASSERT(pd_blocks == g_io_output[index].num_blocks); + CU_ASSERT(ch_ctx->base_bdevs_io_channel[pd_idx] == g_io_output[index].ch); + CU_ASSERT(raid_bdev->base_bdev_info[pd_idx].base_bdev_desc == g_io_output[index].desc); + CU_ASSERT(buf == g_io_output[index].buf); + CU_ASSERT(bdev_io->type == g_io_output[index].iotype); + buf += (pd_blocks << spdk_u32log2(g_block_len)); + } + } else { + CU_ASSERT(g_io_output_index == 1); + CU_ASSERT(bdev_io->u.bdev.offset_blocks == g_io_output[0].offset_blocks); + CU_ASSERT(bdev_io->u.bdev.num_blocks == g_io_output[0].num_blocks); + CU_ASSERT(ch_ctx->base_bdevs_io_channel[0] == g_io_output[0].ch); + CU_ASSERT(raid_bdev->base_bdev_info[0].base_bdev_desc == g_io_output[0].desc); + CU_ASSERT(buf == g_io_output[index].buf); + } + CU_ASSERT(g_io_comp_status == io_status); +} + +static void +verify_raid_config_present(const char *name, bool presence) +{ + uint32_t iter; + bool cfg_found; + + cfg_found = false; + for (iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + if (strcmp(name, g_spdk_raid_config.raid_bdev_config[iter].name) == 0) { + cfg_found = true; + break; + } + } + + if (presence == true) { + CU_ASSERT(cfg_found == true); + } else { + CU_ASSERT(cfg_found == false); + } +} + +static void +verify_raid_bdev_present(const char *name, bool presence) +{ + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct raid_bdev *pbdev; + bool pbdev_found; + + pbdev_found = false; + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, name) == 0) { + pbdev_found = true; + break; + } + } + if (presence == true) { + CU_ASSERT(pbdev_found == true); + } else { + CU_ASSERT(pbdev_found == false); + } +} +static void +verify_raid_config(struct rpc_construct_raid_bdev *r, bool presence) +{ + struct raid_bdev_config *raid_cfg = NULL; + uint32_t iter, iter2; + int val; + + for (iter = 0; iter < g_spdk_raid_config.total_raid_bdev; iter++) { + if (strcmp(r->name, g_spdk_raid_config.raid_bdev_config[iter].name) == 0) { + raid_cfg = &g_spdk_raid_config.raid_bdev_config[iter]; + if (presence == false) { + break; + } + CU_ASSERT(raid_cfg->raid_bdev_ctxt != NULL); + CU_ASSERT(raid_cfg->strip_size == r->strip_size); + CU_ASSERT(raid_cfg->num_base_bdevs == r->base_bdevs.num_base_bdevs); + CU_ASSERT(raid_cfg->raid_level == r->raid_level); + for (iter2 = 0; iter2 < raid_cfg->num_base_bdevs; iter2++) { + val = strcmp(raid_cfg->base_bdev[iter2].bdev_name, r->base_bdevs.base_bdevs[iter2]); + CU_ASSERT(val == 0); + } + break; + } + } + + if (presence == true) { + CU_ASSERT(raid_cfg != NULL); + } else { + CU_ASSERT(raid_cfg == NULL); + } +} + +static void +verify_raid_bdev(struct rpc_construct_raid_bdev *r, bool presence, uint32_t raid_state) +{ + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct raid_bdev *pbdev; + uint32_t iter; + struct spdk_bdev *bdev = NULL; + bool pbdev_found; + uint64_t min_blockcnt = 0xFFFFFFFFFFFFFFFF; + + pbdev_found = false; + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, r->name) == 0) { + pbdev_found = true; + if (presence == false) { + break; + } + CU_ASSERT(pbdev->raid_bdev_config->raid_bdev_ctxt == pbdev_ctxt); + CU_ASSERT(pbdev->base_bdev_info != NULL); + CU_ASSERT(pbdev->strip_size == ((r->strip_size * 1024) / g_block_len)); + CU_ASSERT(pbdev->strip_size_shift == spdk_u32log2(((r->strip_size * 1024) / g_block_len))); + CU_ASSERT(pbdev->blocklen_shift == spdk_u32log2(g_block_len)); + CU_ASSERT(pbdev->state == raid_state); + CU_ASSERT(pbdev->num_base_bdevs == r->base_bdevs.num_base_bdevs); + CU_ASSERT(pbdev->num_base_bdevs_discovered == r->base_bdevs.num_base_bdevs); + CU_ASSERT(pbdev->raid_level == r->raid_level); + CU_ASSERT(pbdev->destruct_called == false); + for (iter = 0; iter < pbdev->num_base_bdevs; iter++) { + if (pbdev->base_bdev_info && pbdev->base_bdev_info[iter].base_bdev) { + bdev = spdk_bdev_get_by_name(pbdev->base_bdev_info[iter].base_bdev->name); + CU_ASSERT(bdev != NULL); + CU_ASSERT(pbdev->base_bdev_info[iter].base_bdev_remove_scheduled == false); + } else { + CU_ASSERT(0); + } + + if (bdev && bdev->blockcnt < min_blockcnt) { + min_blockcnt = bdev->blockcnt; + } + } + CU_ASSERT((((min_blockcnt / (r->strip_size * 1024 / g_block_len)) * (r->strip_size * 1024 / + g_block_len)) * r->base_bdevs.num_base_bdevs) == pbdev_ctxt->bdev.blockcnt); + CU_ASSERT(strcmp(pbdev_ctxt->bdev.product_name, "Pooled Device") == 0); + CU_ASSERT(pbdev_ctxt->bdev.write_cache == 0); + CU_ASSERT(pbdev_ctxt->bdev.blocklen == g_block_len); + CU_ASSERT(pbdev_ctxt->bdev.optimal_io_boundary == 0); + CU_ASSERT(pbdev_ctxt->bdev.ctxt == pbdev_ctxt); + CU_ASSERT(pbdev_ctxt->bdev.fn_table == &g_raid_bdev_fn_table); + CU_ASSERT(pbdev_ctxt->bdev.module == &g_raid_if); + break; + } + } + if (presence == true) { + CU_ASSERT(pbdev_found == true); + } else { + CU_ASSERT(pbdev_found == false); + } + pbdev_found = false; + if (raid_state == RAID_BDEV_STATE_ONLINE) { + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_configured_list, link_specific_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, r->name) == 0) { + pbdev_found = true; + break; + } + } + } else if (raid_state == RAID_BDEV_STATE_CONFIGURING) { + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_configuring_list, link_specific_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, r->name) == 0) { + pbdev_found = true; + break; + } + } + } else if (raid_state == RAID_BDEV_STATE_OFFLINE) { + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_offline_list, link_specific_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, r->name) == 0) { + pbdev_found = true; + break; + } + } + } + if (presence == true) { + CU_ASSERT(pbdev_found == true); + } else { + CU_ASSERT(pbdev_found == false); + } +} + +int +spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch, + struct spdk_bdev_io_wait_entry *entry) +{ + CU_ASSERT(bdev == entry->bdev); + CU_ASSERT(entry->cb_fn != NULL); + CU_ASSERT(entry->cb_arg != NULL); + TAILQ_INSERT_TAIL(&g_io_waitq, entry, link); + return 0; +} + + +static uint32_t +get_num_elts_in_waitq(void) +{ + struct spdk_bdev_io_wait_entry *ele; + uint32_t count = 0; + + TAILQ_FOREACH(ele, &g_io_waitq, link) { + count++; + } + + return count; +} + +static void +process_io_waitq(void) +{ + struct spdk_bdev_io_wait_entry *ele; + struct spdk_bdev_io_wait_entry *next_ele; + + TAILQ_FOREACH_SAFE(ele, &g_io_waitq, link, next_ele) { + TAILQ_REMOVE(&g_io_waitq, ele, link); + ele->cb_fn(ele->cb_arg); + } +} + +static void +verify_get_raids(struct rpc_construct_raid_bdev *construct_req, + uint8_t g_max_raids, + char **g_get_raids_output, uint32_t g_get_raids_count) +{ + uint32_t iter, iter2; + bool found; + + CU_ASSERT(g_max_raids == g_get_raids_count); + if (g_max_raids == g_get_raids_count) { + for (iter = 0; iter < g_max_raids; iter++) { + found = false; + for (iter2 = 0; iter2 < g_max_raids; iter2++) { + if (construct_req[iter].name && strcmp(construct_req[iter].name, g_get_raids_output[iter]) == 0) { + found = true; + break; + } + } + CU_ASSERT(found == true); + } + } +} + +static void +create_base_bdevs(uint32_t bbdev_start_idx) +{ + uint32_t iter; + struct spdk_bdev *base_bdev; + char name[16]; + uint16_t num_chars; + + for (iter = 0; iter < g_max_base_drives; iter++, bbdev_start_idx++) { + num_chars = snprintf(name, 16, "%s%u%s", "Nvme", bbdev_start_idx, "n1"); + name[num_chars] = '\0'; + base_bdev = calloc(1, sizeof(struct spdk_bdev)); + SPDK_CU_ASSERT_FATAL(base_bdev != NULL); + base_bdev->name = strdup(name); + SPDK_CU_ASSERT_FATAL(base_bdev->name != NULL); + base_bdev->blocklen = g_block_len; + base_bdev->blockcnt = (uint64_t)1024 * 1024 * 1024 * 1024; + TAILQ_INSERT_TAIL(&g_bdev_list, base_bdev, internal.link); + } +} + +static void +create_test_req(struct rpc_construct_raid_bdev *r, const char *raid_name, uint32_t bbdev_start_idx, + bool create_base_bdev) +{ + uint32_t iter; + char name[16]; + uint16_t num_chars; + uint32_t bbdev_idx = bbdev_start_idx; + + r->name = strdup(raid_name); + SPDK_CU_ASSERT_FATAL(r->name != NULL); + r->strip_size = (g_strip_size * g_block_len) / 1024; + r->raid_level = 0; + r->base_bdevs.num_base_bdevs = g_max_base_drives; + for (iter = 0; iter < g_max_base_drives; iter++, bbdev_idx++) { + num_chars = snprintf(name, 16, "%s%u%s", "Nvme", bbdev_idx, "n1"); + name[num_chars] = '\0'; + r->base_bdevs.base_bdevs[iter] = strdup(name); + SPDK_CU_ASSERT_FATAL(r->base_bdevs.base_bdevs[iter] != NULL); + } + if (create_base_bdev == true) { + create_base_bdevs(bbdev_start_idx); + } +} + +static void +free_test_req(struct rpc_construct_raid_bdev *r) +{ + uint8_t iter; + + free(r->name); + for (iter = 0; iter < r->base_bdevs.num_base_bdevs; iter++) { + free(r->base_bdevs.base_bdevs[iter]); + } +} + +static void +test_construct_raid(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_destroy_raid(void) +{ + struct rpc_construct_raid_bdev construct_req; + struct rpc_destroy_raid_bdev destroy_req; + + set_globals(); + create_test_req(&construct_req, "raid1", 0, true); + rpc_req = &construct_req; + rpc_req_size = sizeof(construct_req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(construct_req.name, false); + verify_raid_bdev_present(construct_req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&construct_req, true); + verify_raid_bdev(&construct_req, true, RAID_BDEV_STATE_ONLINE); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_construct_raid_invalid_args(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + + set_globals(); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + + create_test_req(&req, "raid1", 0, true); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + req.raid_level = 1; + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + g_json_decode_obj_err = 1; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + g_json_decode_obj_err = 0; + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.strip_size = 1231; + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + + create_test_req(&req, "raid1", 0, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + + create_test_req(&req, "raid2", 0, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + verify_raid_config_present("raid2", false); + verify_raid_bdev_present("raid2", false); + + create_test_req(&req, "raid2", g_max_base_drives, true); + free(req.base_bdevs.base_bdevs[g_max_base_drives - 1]); + req.base_bdevs.base_bdevs[g_max_base_drives - 1] = strdup("Nvme0n1"); + SPDK_CU_ASSERT_FATAL(req.base_bdevs.base_bdevs[g_max_base_drives - 1] != NULL); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + verify_raid_config_present("raid2", false); + verify_raid_bdev_present("raid2", false); + + create_test_req(&req, "raid2", g_max_base_drives, true); + free(req.base_bdevs.base_bdevs[g_max_base_drives - 1]); + req.base_bdevs.base_bdevs[g_max_base_drives - 1] = strdup("Nvme100000n1"); + SPDK_CU_ASSERT_FATAL(req.base_bdevs.base_bdevs[g_max_base_drives - 1] != NULL); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + verify_raid_config_present("raid2", false); + verify_raid_bdev_present("raid2", false); + + create_test_req(&req, "raid2", g_max_base_drives, false); + g_rpc_err = 0; + g_json_beg_res_ret_err = 1; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid2", true); + verify_raid_bdev_present("raid2", true); + verify_raid_config_present("raid1", true); + verify_raid_bdev_present("raid1", true); + g_json_beg_res_ret_err = 0; + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + spdk_rpc_destroy_raid_bdev(NULL, NULL); + destroy_req.name = strdup("raid2"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + spdk_rpc_destroy_raid_bdev(NULL, NULL); + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_destroy_raid_invalid_args(void) +{ + struct rpc_construct_raid_bdev construct_req; + struct rpc_destroy_raid_bdev destroy_req; + + set_globals(); + create_test_req(&construct_req, "raid1", 0, true); + rpc_req = &construct_req; + rpc_req_size = sizeof(construct_req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(construct_req.name, false); + verify_raid_bdev_present(construct_req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&construct_req, true); + verify_raid_bdev(&construct_req, true, RAID_BDEV_STATE_ONLINE); + + destroy_req.name = strdup("raid2"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + + destroy_req.name = strdup("raid1"); + g_rpc_err = 0; + g_json_decode_obj_err = 1; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + g_json_decode_obj_err = 0; + g_rpc_err = 0; + free(destroy_req.name); + verify_raid_config_present("raid1", true); + verify_raid_bdev_present("raid1", true); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_io_channel(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct raid_bdev_io_channel *ch_ctx; + uint32_t iter; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch_ctx = calloc(1, sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + for (iter = 0; iter < req.base_bdevs.num_base_bdevs; iter++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel && ch_ctx->base_bdevs_io_channel[iter] == (void *)0x1); + } + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + free(ch_ctx); + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_write_io(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct spdk_io_channel *ch; + struct raid_bdev_io_channel *ch_ctx; + uint32_t iter; + struct spdk_bdev_io *bdev_io; + uint32_t count; + uint64_t io_len; + uint64_t lba; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch = calloc(1, sizeof(struct spdk_io_channel) + sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + ch_ctx = spdk_io_channel_get_ctx(ch); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + for (iter = 0; iter < req.base_bdevs.num_base_bdevs; iter++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel && ch_ctx->base_bdevs_io_channel[iter] == (void *)0x1); + } + + lba = 0; + for (count = 0; count < g_max_qd; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + io_len = (rand() % g_max_io_size) + 1; + bdev_io_initialize(bdev_io, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE); + lba += io_len; + memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output)); + g_io_output_index = 0; + raid_bdev_submit_request(ch, bdev_io); + verify_io(bdev_io, req.base_bdevs.num_base_bdevs, ch_ctx, &pbdev_ctxt->raid_bdev, + g_child_io_status_flag); + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + free(ch); + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_read_io(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct spdk_io_channel *ch; + struct raid_bdev_io_channel *ch_ctx; + uint32_t iter; + struct spdk_bdev_io *bdev_io; + uint32_t count; + uint64_t io_len; + uint64_t lba; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch = calloc(1, sizeof(struct spdk_io_channel) + sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + ch_ctx = spdk_io_channel_get_ctx(ch); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + for (iter = 0; iter < req.base_bdevs.num_base_bdevs; iter++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel && ch_ctx->base_bdevs_io_channel[iter] == (void *)0x1); + } + + lba = 0; + for (count = 0; count < g_max_qd; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + io_len = (rand() % g_max_io_size) + 1; + bdev_io_initialize(bdev_io, lba, io_len, SPDK_BDEV_IO_TYPE_READ); + lba += io_len; + memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output)); + g_io_output_index = 0; + raid_bdev_submit_request(ch, bdev_io); + verify_io(bdev_io, req.base_bdevs.num_base_bdevs, ch_ctx, &pbdev_ctxt->raid_bdev, + g_child_io_status_flag); + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + free(ch); + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +/* Test IO failures */ +static void +test_io_failure(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct spdk_io_channel *ch; + struct raid_bdev_io_channel *ch_ctx; + uint32_t iter; + struct spdk_bdev_io *bdev_io; + uint32_t count; + uint64_t io_len; + uint64_t lba; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch = calloc(1, sizeof(struct spdk_io_channel) + sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + ch_ctx = spdk_io_channel_get_ctx(ch); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + for (iter = 0; iter < req.base_bdevs.num_base_bdevs; iter++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel && ch_ctx->base_bdevs_io_channel[iter] == (void *)0x1); + } + + lba = 0; + for (count = 0; count < 1; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + io_len = (rand() % g_max_io_size) + 1; + bdev_io_initialize(bdev_io, lba, io_len, SPDK_BDEV_IO_TYPE_INVALID); + lba += io_len; + memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output)); + g_io_output_index = 0; + raid_bdev_submit_request(ch, bdev_io); + verify_io(bdev_io, req.base_bdevs.num_base_bdevs, ch_ctx, &pbdev_ctxt->raid_bdev, + INVALID_IO_SUBMIT); + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + + lba = 0; + g_child_io_status_flag = false; + for (count = 0; count < 1; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + io_len = (rand() % g_max_io_size) + 1; + bdev_io_initialize(bdev_io, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE); + lba += io_len; + memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output)); + g_io_output_index = 0; + raid_bdev_submit_request(ch, bdev_io); + verify_io(bdev_io, req.base_bdevs.num_base_bdevs, ch_ctx, &pbdev_ctxt->raid_bdev, + g_child_io_status_flag); + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + free(ch); + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +/* Test waitq logic */ +static void +test_io_waitq(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct spdk_io_channel *ch; + struct raid_bdev_io_channel *ch_ctx; + uint32_t iter; + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_io *bdev_io_next; + uint32_t count; + uint64_t io_len; + uint64_t lba; + TAILQ_HEAD(, spdk_bdev_io) head_io; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + SPDK_CU_ASSERT_FATAL(pbdev_ctxt != NULL); + ch = calloc(1, sizeof(struct spdk_io_channel) + sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + ch_ctx = spdk_io_channel_get_ctx(ch); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + SPDK_CU_ASSERT_FATAL(ch_ctx->base_bdevs_io_channel != NULL); + for (iter = 0; iter < req.base_bdevs.num_base_bdevs; iter++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel[iter] == (void *)0x1); + } + + lba = 0; + TAILQ_INIT(&head_io); + for (count = 0; count < g_max_qd; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + TAILQ_INSERT_TAIL(&head_io, bdev_io, module_link); + io_len = (rand() % g_max_io_size) + 1; + bdev_io_initialize(bdev_io, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE); + g_bdev_io_submit_status = -ENOMEM; + lba += io_len; + raid_bdev_submit_request(ch, bdev_io); + } + + g_ignore_io_output = 1; + + count = get_num_elts_in_waitq(); + CU_ASSERT(count == g_max_qd); + g_bdev_io_submit_status = 0; + process_io_waitq(); + CU_ASSERT(TAILQ_EMPTY(&g_io_waitq)); + + TAILQ_FOREACH_SAFE(bdev_io, &head_io, module_link, bdev_io_next) { + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + g_ignore_io_output = 0; + free(ch); + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +/* Create multiple raids, destroy raids without IO, get_raids related tests */ +static void +test_multi_raid_no_io(void) +{ + struct rpc_construct_raid_bdev *construct_req; + struct rpc_destroy_raid_bdev destroy_req; + struct rpc_get_raid_bdevs get_raids_req; + uint32_t iter; + char name[16]; + uint32_t count; + uint32_t bbdev_idx = 0; + + set_globals(); + construct_req = calloc(MAX_RAIDS, sizeof(struct rpc_construct_raid_bdev)); + SPDK_CU_ASSERT_FATAL(construct_req != NULL); + CU_ASSERT(raid_bdev_init() == 0); + for (iter = 0; iter < g_max_raids; iter++) { + count = snprintf(name, 16, "%s%u", "raid", iter); + name[count] = '\0'; + create_test_req(&construct_req[iter], name, bbdev_idx, true); + verify_raid_config_present(name, false); + verify_raid_bdev_present(name, false); + bbdev_idx += g_max_base_drives; + rpc_req = &construct_req[iter]; + rpc_req_size = sizeof(construct_req[0]); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&construct_req[iter], true); + verify_raid_bdev(&construct_req[iter], true, RAID_BDEV_STATE_ONLINE); + } + + get_raids_req.category = strdup("all"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + g_test_multi_raids = 1; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_get_raids(construct_req, g_max_raids, g_get_raids_output, g_get_raids_count); + for (iter = 0; iter < g_get_raids_count; iter++) { + free(g_get_raids_output[iter]); + } + g_get_raids_count = 0; + + get_raids_req.category = strdup("online"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_get_raids(construct_req, g_max_raids, g_get_raids_output, g_get_raids_count); + for (iter = 0; iter < g_get_raids_count; iter++) { + free(g_get_raids_output[iter]); + } + g_get_raids_count = 0; + + get_raids_req.category = strdup("configuring"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + CU_ASSERT(g_get_raids_count == 0); + + get_raids_req.category = strdup("offline"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + CU_ASSERT(g_get_raids_count == 0); + + get_raids_req.category = strdup("invalid_category"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + CU_ASSERT(g_get_raids_count == 0); + + get_raids_req.category = strdup("all"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + g_json_decode_obj_err = 1; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 1); + g_json_decode_obj_err = 0; + free(get_raids_req.category); + CU_ASSERT(g_get_raids_count == 0); + + get_raids_req.category = strdup("all"); + rpc_req = &get_raids_req; + rpc_req_size = sizeof(get_raids_req); + g_rpc_err = 0; + g_json_beg_res_ret_err = 1; + spdk_rpc_get_raid_bdevs(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + g_json_beg_res_ret_err = 0; + CU_ASSERT(g_get_raids_count == 0); + + for (iter = 0; iter < g_max_raids; iter++) { + SPDK_CU_ASSERT_FATAL(construct_req[iter].name != NULL); + destroy_req.name = strdup(construct_req[iter].name); + count = snprintf(name, 16, "%s", destroy_req.name); + name[count] = '\0'; + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present(name, false); + verify_raid_bdev_present(name, false); + } + g_test_multi_raids = 0; + raid_bdev_exit(); + free(construct_req); + base_bdevs_cleanup(); + reset_globals(); +} + +/* Create multiple raids, fire IOs randomly on various raids */ +static void +test_multi_raid_with_io(void) +{ + struct rpc_construct_raid_bdev *construct_req; + struct rpc_destroy_raid_bdev destroy_req; + uint32_t iter, iter2; + char name[16]; + uint32_t count; + uint32_t bbdev_idx = 0; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + struct spdk_io_channel *ch; + struct raid_bdev_io_channel *ch_ctx; + struct spdk_bdev_io *bdev_io; + uint64_t io_len; + uint64_t lba; + struct spdk_io_channel *ch_random; + struct raid_bdev_io_channel *ch_ctx_random; + int16_t iotype; + uint32_t raid_random; + + set_globals(); + construct_req = calloc(g_max_raids, sizeof(struct rpc_construct_raid_bdev)); + SPDK_CU_ASSERT_FATAL(construct_req != NULL); + CU_ASSERT(raid_bdev_init() == 0); + ch = calloc(g_max_raids, sizeof(struct spdk_io_channel) + sizeof(struct raid_bdev_io_channel)); + SPDK_CU_ASSERT_FATAL(ch != NULL); + for (iter = 0; iter < g_max_raids; iter++) { + count = snprintf(name, 16, "%s%u", "raid", iter); + name[count] = '\0'; + create_test_req(&construct_req[iter], name, bbdev_idx, true); + verify_raid_config_present(name, false); + verify_raid_bdev_present(name, false); + bbdev_idx += g_max_base_drives; + rpc_req = &construct_req[iter]; + rpc_req_size = sizeof(construct_req[0]); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&construct_req[iter], true); + verify_raid_bdev(&construct_req[iter], true, RAID_BDEV_STATE_ONLINE); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, construct_req[iter].name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch_ctx = spdk_io_channel_get_ctx(&ch[iter]); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + CU_ASSERT(raid_bdev_create_cb(&pbdev_ctxt->raid_bdev, ch_ctx) == 0); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == pbdev_ctxt); + CU_ASSERT(ch_ctx->base_bdevs_io_channel != NULL); + for (iter2 = 0; iter2 < construct_req[iter].base_bdevs.num_base_bdevs; iter2++) { + CU_ASSERT(ch_ctx->base_bdevs_io_channel[iter2] == (void *)0x1); + } + } + + lba = 0; + for (count = 0; count < g_max_qd; count++) { + bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io)); + SPDK_CU_ASSERT_FATAL(bdev_io != NULL); + io_len = (rand() % g_max_io_size) + 1; + iotype = (rand() % 2) ? SPDK_BDEV_IO_TYPE_WRITE : SPDK_BDEV_IO_TYPE_READ; + bdev_io_initialize(bdev_io, lba, io_len, iotype); + lba += io_len; + memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output)); + g_io_output_index = 0; + raid_random = rand() % g_max_raids; + ch_random = &ch[raid_random]; + ch_ctx_random = spdk_io_channel_get_ctx(ch_random); + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, construct_req[raid_random].name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + raid_bdev_submit_request(ch_random, bdev_io); + verify_io(bdev_io, g_max_base_drives, ch_ctx_random, &pbdev_ctxt->raid_bdev, + g_child_io_status_flag); + bdev_io_cleanup(bdev_io); + free(bdev_io); + } + + for (iter = 0; iter < g_max_raids; iter++) { + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, construct_req[iter].name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + ch_ctx = spdk_io_channel_get_ctx(&ch[iter]); + SPDK_CU_ASSERT_FATAL(ch_ctx != NULL); + raid_bdev_destroy_cb(&pbdev_ctxt->raid_bdev, ch_ctx); + CU_ASSERT(ch_ctx->raid_bdev_ctxt == NULL); + CU_ASSERT(ch_ctx->base_bdevs_io_channel == NULL); + destroy_req.name = strdup(construct_req[iter].name); + count = snprintf(name, 16, "%s", destroy_req.name); + name[count] = '\0'; + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present(name, false); + verify_raid_bdev_present(name, false); + } + raid_bdev_exit(); + free(construct_req); + free(ch); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_io_type_supported(void) +{ + CU_ASSERT(raid_bdev_io_type_supported(NULL, SPDK_BDEV_IO_TYPE_READ) == true); + CU_ASSERT(raid_bdev_io_type_supported(NULL, SPDK_BDEV_IO_TYPE_WRITE) == true); + CU_ASSERT(raid_bdev_io_type_supported(NULL, SPDK_BDEV_IO_TYPE_FLUSH) == true); + CU_ASSERT(raid_bdev_io_type_supported(NULL, SPDK_BDEV_IO_TYPE_INVALID) == false); +} + +static void +test_create_raid_from_config(void) +{ + struct rpc_construct_raid_bdev req; + struct spdk_bdev *bdev; + struct rpc_destroy_raid_bdev destroy_req; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + g_config_level_create = 1; + CU_ASSERT(raid_bdev_init() == 0); + g_config_level_create = 0; + + verify_raid_config_present("raid1", true); + verify_raid_bdev_present("raid1", false); + + TAILQ_FOREACH(bdev, &g_bdev_list, internal.link) { + raid_bdev_examine(bdev); + } + + bdev = calloc(1, sizeof(struct spdk_bdev)); + SPDK_CU_ASSERT_FATAL(bdev != NULL); + bdev->name = strdup("Invalid"); + SPDK_CU_ASSERT_FATAL(bdev->name != NULL); + CU_ASSERT(raid_bdev_add_base_device(bdev) != 0); + free(bdev->name); + free(bdev); + + verify_raid_config(&req, true); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + free_test_req(&req); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_create_raid_from_config_invalid_params(void) +{ + struct rpc_construct_raid_bdev req; + uint8_t count; + + set_globals(); + rpc_req = &req; + rpc_req_size = sizeof(req); + g_config_level_create = 1; + + create_test_req(&req, "raid1", 0, true); + free(req.name); + req.name = NULL; + CU_ASSERT(raid_bdev_init() != 0); + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.strip_size = 1234; + CU_ASSERT(raid_bdev_init() != 0); + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.raid_level = 1; + CU_ASSERT(raid_bdev_init() != 0); + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.raid_level = 1; + CU_ASSERT(raid_bdev_init() != 0); + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.base_bdevs.num_base_bdevs++; + CU_ASSERT(raid_bdev_init() != 0); + req.base_bdevs.num_base_bdevs--; + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + create_test_req(&req, "raid1", 0, false); + req.base_bdevs.num_base_bdevs--; + CU_ASSERT(raid_bdev_init() != 0); + req.base_bdevs.num_base_bdevs++; + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + if (g_max_base_drives > 1) { + create_test_req(&req, "raid1", 0, false); + count = snprintf(req.base_bdevs.base_bdevs[g_max_base_drives - 1], 15, "%s", "Nvme0n1"); + req.base_bdevs.base_bdevs[g_max_base_drives - 1][count] = '\0'; + CU_ASSERT(raid_bdev_init() != 0); + free_test_req(&req); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + } + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_raid_json_dump_info(void) +{ + struct rpc_construct_raid_bdev req; + struct rpc_destroy_raid_bdev destroy_req; + struct raid_bdev *pbdev; + struct raid_bdev_ctxt *pbdev_ctxt = NULL; + + set_globals(); + create_test_req(&req, "raid1", 0, true); + rpc_req = &req; + rpc_req_size = sizeof(req); + CU_ASSERT(raid_bdev_init() == 0); + + verify_raid_config_present(req.name, false); + verify_raid_bdev_present(req.name, false); + g_rpc_err = 0; + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE); + + TAILQ_FOREACH(pbdev, &g_spdk_raid_bdev_list, link_global_list) { + pbdev_ctxt = SPDK_CONTAINEROF(pbdev, struct raid_bdev_ctxt, raid_bdev); + if (strcmp(pbdev_ctxt->bdev.name, req.name) == 0) { + break; + } + } + CU_ASSERT(pbdev_ctxt != NULL); + + CU_ASSERT(raid_bdev_dump_info_json(pbdev_ctxt, NULL) == 0); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +static void +test_context_size(void) +{ + CU_ASSERT(raid_bdev_get_ctx_size() == sizeof(struct raid_bdev_io)); +} + +static void +test_asym_base_drives_blockcnt(void) +{ + struct rpc_construct_raid_bdev construct_req; + struct rpc_destroy_raid_bdev destroy_req; + struct spdk_bdev *bbdev; + uint32_t iter; + + set_globals(); + create_test_req(&construct_req, "raid1", 0, true); + rpc_req = &construct_req; + rpc_req_size = sizeof(construct_req); + CU_ASSERT(raid_bdev_init() == 0); + verify_raid_config_present(construct_req.name, false); + verify_raid_bdev_present(construct_req.name, false); + g_rpc_err = 0; + for (iter = 0; iter < construct_req.base_bdevs.num_base_bdevs; iter++) { + bbdev = spdk_bdev_get_by_name(construct_req.base_bdevs.base_bdevs[iter]); + SPDK_CU_ASSERT_FATAL(bbdev != NULL); + bbdev->blockcnt = rand() + 1; + } + spdk_rpc_construct_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config(&construct_req, true); + verify_raid_bdev(&construct_req, true, RAID_BDEV_STATE_ONLINE); + + destroy_req.name = strdup("raid1"); + rpc_req = &destroy_req; + rpc_req_size = sizeof(destroy_req); + g_rpc_err = 0; + spdk_rpc_destroy_raid_bdev(NULL, NULL); + CU_ASSERT(g_rpc_err == 0); + verify_raid_config_present("raid1", false); + verify_raid_bdev_present("raid1", false); + + raid_bdev_exit(); + base_bdevs_cleanup(); + reset_globals(); +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("raid", NULL, NULL); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if ( + CU_add_test(suite, "test_construct_raid", test_construct_raid) == NULL || + CU_add_test(suite, "test_destroy_raid", test_destroy_raid) == NULL || + CU_add_test(suite, "test_construct_raid_invalid_args", test_construct_raid_invalid_args) == NULL || + CU_add_test(suite, "test_destroy_raid_invalid_args", test_destroy_raid_invalid_args) == NULL || + CU_add_test(suite, "test_io_channel", test_io_channel) == NULL || + CU_add_test(suite, "test_write_io", test_write_io) == NULL || + CU_add_test(suite, "test_read_io", test_read_io) == NULL || + CU_add_test(suite, "test_io_failure", test_io_failure) == NULL || + CU_add_test(suite, "test_io_waitq", test_io_waitq) == NULL || + CU_add_test(suite, "test_multi_raid_no_io", test_multi_raid_no_io) == NULL || + CU_add_test(suite, "test_multi_raid_with_io", test_multi_raid_with_io) == NULL || + CU_add_test(suite, "test_io_type_supported", test_io_type_supported) == NULL || + CU_add_test(suite, "test_create_raid_from_config", test_create_raid_from_config) == NULL || + CU_add_test(suite, "test_create_raid_from_config_invalid_params", + test_create_raid_from_config_invalid_params) == NULL || + CU_add_test(suite, "test_raid_json_dump_info", test_raid_json_dump_info) == NULL || + CU_add_test(suite, "test_context_size", test_context_size) == NULL || + CU_add_test(suite, "test_asym_base_drives_blockcnt", test_asym_base_drives_blockcnt) == NULL + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + CU_basic_set_mode(CU_BRM_VERBOSE); + set_test_opts(); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/test/unit/unittest.sh b/test/unit/unittest.sh index b1a545086..ef003e7b6 100755 --- a/test/unit/unittest.sh +++ b/test/unit/unittest.sh @@ -48,6 +48,7 @@ fi $valgrind $testdir/include/spdk/histogram_data.h/histogram_ut $valgrind $testdir/lib/bdev/bdev.c/bdev_ut +$valgrind $testdir/lib/bdev/bdev_raid.c/bdev_raid_ut $valgrind $testdir/lib/bdev/part.c/part_ut $valgrind $testdir/lib/bdev/scsi_nvme.c/scsi_nvme_ut $valgrind $testdir/lib/bdev/gpt/gpt.c/gpt_ut