diff --git a/CONFIG b/CONFIG index e77332bfd..25be84fd7 100644 --- a/CONFIG +++ b/CONFIG @@ -160,3 +160,6 @@ CONFIG_URING_PATH= # Build with FUSE support CONFIG_FUSE=n + +# Build with RAID5 support +CONFIG_RAID5=n diff --git a/configure b/configure index 095fb7924..f99f12aa6 100755 --- a/configure +++ b/configure @@ -96,6 +96,8 @@ function usage() echo " No path required." echo " nvme-cuse Build NVMe driver with support for CUSE-based character devices." echo " No path required." + echo " raid5 Build with bdev_raid module RAID5 support." + echo " No path required." echo "" echo "Environment variables:" echo "" @@ -409,6 +411,12 @@ for i in "$@"; do --without-nvme-cuse) CONFIG[NVME_CUSE]=n ;; + --with-raid5) + CONFIG[RAID5]=y + ;; + --without-raid5) + CONFIG[RAID5]=n + ;; --) break ;; diff --git a/module/bdev/raid/Makefile b/module/bdev/raid/Makefile index d8114517f..5e503baa9 100644 --- a/module/bdev/raid/Makefile +++ b/module/bdev/raid/Makefile @@ -36,6 +36,11 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/ C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c + +ifeq ($(CONFIG_RAID5),y) +C_SRCS += raid5.c +endif + LIBNAME = bdev_raid include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index 1f5b54c6d..5eb4da557 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -846,6 +846,8 @@ static struct { } g_raid_level_names[] = { { "raid0", RAID0 }, { "0", RAID0 }, + { "raid5", RAID5 }, + { "5", RAID5 }, { } }; diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index 767452761..a55092cd7 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -39,6 +39,7 @@ enum raid_level { INVALID_RAID_LEVEL = -1, RAID0 = 0, + RAID5 = 5, }; /* @@ -162,6 +163,9 @@ struct raid_bdev { /* Module for RAID-level specific operations */ struct raid_bdev_module *module; + + /* Private data for the raid module */ + void *module_private; }; #define RAID_FOR_EACH_BASE_BDEV(r, i) \ @@ -262,6 +266,12 @@ struct raid_bdev_module { /* Minimum required number of base bdevs. Must be > 0. */ uint8_t base_bdevs_min; + /* + * Maximum number of base bdevs that can be removed without failing + * the array. + */ + uint8_t base_bdevs_max_degraded; + /* * Called when the raid is starting, right before changing the state to * online and registering the bdev. Parameters of the bdev like blockcnt diff --git a/module/bdev/raid/raid5.c b/module/bdev/raid/raid5.c new file mode 100644 index 000000000..a876381e3 --- /dev/null +++ b/module/bdev/raid/raid5.c @@ -0,0 +1,114 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_raid.h" + +#include "spdk/env.h" +#include "spdk/io_channel.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk_internal/log.h" + +struct raid5_info { + /* The parent raid bdev */ + struct raid_bdev *raid_bdev; + + /* Number of data blocks in a stripe (without parity) */ + uint64_t stripe_blocks; + + /* Number of stripes on this array */ + uint64_t total_stripes; +}; + +static inline uint8_t +raid5_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) +{ + return raid_bdev->num_base_bdevs - raid_bdev->module->base_bdevs_max_degraded; +} + +static void +raid5_submit_rw_request(struct raid_bdev_io *raid_io) +{ + raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); +} + +static int +raid5_start(struct raid_bdev *raid_bdev) +{ + uint64_t min_blockcnt = UINT64_MAX; + struct raid_base_bdev_info *base_info; + struct raid5_info *r5info; + + r5info = calloc(1, sizeof(*r5info)); + if (!r5info) { + SPDK_ERRLOG("Failed to allocate r5info\n"); + return -ENOMEM; + } + r5info->raid_bdev = raid_bdev; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt); + } + + r5info->total_stripes = min_blockcnt / raid_bdev->strip_size; + r5info->stripe_blocks = raid_bdev->strip_size * raid5_stripe_data_chunks_num(raid_bdev); + + raid_bdev->bdev.blockcnt = r5info->stripe_blocks * r5info->total_stripes; + raid_bdev->bdev.optimal_io_boundary = r5info->stripe_blocks; + raid_bdev->bdev.split_on_optimal_io_boundary = true; + + raid_bdev->module_private = r5info; + + return 0; +} + +static void +raid5_stop(struct raid_bdev *raid_bdev) +{ + struct raid5_info *r5info = raid_bdev->module_private; + + free(r5info); +} + +static struct raid_bdev_module g_raid5_module = { + .level = RAID5, + .base_bdevs_min = 3, + .base_bdevs_max_degraded = 1, + .start = raid5_start, + .stop = raid5_stop, + .submit_rw_request = raid5_submit_rw_request, +}; +RAID_MODULE_REGISTER(&g_raid5_module) + +SPDK_LOG_REGISTER_COMPONENT("bdev_raid5", SPDK_LOG_BDEV_RAID5) diff --git a/test/common/autotest_common.sh b/test/common/autotest_common.sh index 653295e25..2449a8557 100644 --- a/test/common/autotest_common.sh +++ b/test/common/autotest_common.sh @@ -87,6 +87,7 @@ export RUN_NIGHTLY_FAILING : ${SPDK_TEST_VMD=0}; export SPDK_TEST_VMD : ${SPDK_TEST_OPAL=0}; export SPDK_TEST_OPAL : ${SPDK_AUTOTEST_X=true}; export SPDK_AUTOTEST_X +: ${SPDK_TEST_RAID5=0}; export SPDK_TEST_RAID5 # Export PYTHONPATH with addition of RPC framework. New scripts can be created # specific use cases for tests. @@ -253,6 +254,10 @@ if [ $SPDK_TEST_BLOBFS -eq 1 ]; then fi fi +if [ $SPDK_TEST_RAID5 -eq 1 ]; then + config_params+=' --with-raid5' +fi + # By default, --with-dpdk is not set meaning the SPDK build will use the DPDK submodule. # If a DPDK installation is found in a well-known location though, WITH_DPDK_DIR will be # set which will override the default and use that DPDK installation instead. diff --git a/test/unit/lib/bdev/raid/Makefile b/test/unit/lib/bdev/raid/Makefile index e20f4a82f..0090a85ce 100644 --- a/test/unit/lib/bdev/raid/Makefile +++ b/test/unit/lib/bdev/raid/Makefile @@ -36,6 +36,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk DIRS-y = bdev_raid.c +DIRS-$(CONFIG_RAID5) += raid5.c + .PHONY: all clean $(DIRS-y) all: $(DIRS-y) diff --git a/test/unit/lib/bdev/raid/raid5.c/.gitignore b/test/unit/lib/bdev/raid/raid5.c/.gitignore new file mode 100644 index 000000000..946026bf5 --- /dev/null +++ b/test/unit/lib/bdev/raid/raid5.c/.gitignore @@ -0,0 +1 @@ +raid5_ut diff --git a/test/unit/lib/bdev/raid/raid5.c/Makefile b/test/unit/lib/bdev/raid/raid5.c/Makefile new file mode 100644 index 000000000..ddb733333 --- /dev/null +++ b/test/unit/lib/bdev/raid/raid5.c/Makefile @@ -0,0 +1,38 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../../..) + +TEST_FILE = raid5_ut.c + +include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk diff --git a/test/unit/lib/bdev/raid/raid5.c/raid5_ut.c b/test/unit/lib/bdev/raid/raid5.c/raid5_ut.c new file mode 100644 index 000000000..22d560d7a --- /dev/null +++ b/test/unit/lib/bdev/raid/raid5.c/raid5_ut.c @@ -0,0 +1,223 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE AiRE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk_cunit.h" +#include "spdk/env.h" +#include "spdk_internal/mock.h" + +#include "bdev/raid/raid5.c" + +DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module)); +DEFINE_STUB_V(raid_bdev_io_complete, (struct raid_bdev_io *raid_io, + enum spdk_bdev_io_status status)); + +struct raid5_params { + uint8_t num_base_bdevs; + uint64_t base_bdev_blockcnt; + uint32_t base_bdev_blocklen; + uint32_t strip_size; +}; + +static struct raid5_params *g_params; +static size_t g_params_count; + +#define ARRAY_FOR_EACH(a, e) \ + for (e = a; e < a + SPDK_COUNTOF(a); e++) + +#define RAID5_PARAMS_FOR_EACH(p) \ + for (p = g_params; p < g_params + g_params_count; p++) + +static int +test_setup(void) +{ + uint8_t num_base_bdevs_values[] = { 3, 4, 5 }; + uint64_t base_bdev_blockcnt_values[] = { 1, 1024, 1024 * 1024 }; + uint32_t base_bdev_blocklen_values[] = { 512, 4096 }; + uint32_t strip_size_kb_values[] = { 1, 4, 128 }; + uint8_t *num_base_bdevs; + uint64_t *base_bdev_blockcnt; + uint32_t *base_bdev_blocklen; + uint32_t *strip_size_kb; + struct raid5_params *params; + + g_params_count = SPDK_COUNTOF(num_base_bdevs_values) * + SPDK_COUNTOF(base_bdev_blockcnt_values) * + SPDK_COUNTOF(base_bdev_blocklen_values) * + SPDK_COUNTOF(strip_size_kb_values); + g_params = calloc(g_params_count, sizeof(*g_params)); + if (!g_params) { + return -ENOMEM; + } + + params = g_params; + + ARRAY_FOR_EACH(num_base_bdevs_values, num_base_bdevs) { + ARRAY_FOR_EACH(base_bdev_blockcnt_values, base_bdev_blockcnt) { + ARRAY_FOR_EACH(base_bdev_blocklen_values, base_bdev_blocklen) { + ARRAY_FOR_EACH(strip_size_kb_values, strip_size_kb) { + params->num_base_bdevs = *num_base_bdevs; + params->base_bdev_blockcnt = *base_bdev_blockcnt; + params->base_bdev_blocklen = *base_bdev_blocklen; + params->strip_size = *strip_size_kb * 1024 / *base_bdev_blocklen; + if (params->strip_size == 0 || + params->strip_size > *base_bdev_blockcnt) { + g_params_count--; + continue; + } + params++; + } + } + } + } + + return 0; +} + +static int +test_cleanup(void) +{ + free(g_params); + return 0; +} + +static struct raid_bdev * +create_raid_bdev(struct raid5_params *params) +{ + struct raid_bdev *raid_bdev; + struct raid_base_bdev_info *base_info; + + raid_bdev = calloc(1, sizeof(*raid_bdev)); + SPDK_CU_ASSERT_FATAL(raid_bdev != NULL); + + raid_bdev->module = &g_raid5_module; + raid_bdev->num_base_bdevs = params->num_base_bdevs; + raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs, + sizeof(struct raid_base_bdev_info)); + SPDK_CU_ASSERT_FATAL(raid_bdev->base_bdev_info != NULL); + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + base_info->bdev = calloc(1, sizeof(*base_info->bdev)); + SPDK_CU_ASSERT_FATAL(base_info->bdev != NULL); + + base_info->bdev->blockcnt = params->base_bdev_blockcnt; + base_info->bdev->blocklen = params->base_bdev_blocklen; + } + + raid_bdev->strip_size = params->strip_size; + raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size); + raid_bdev->bdev.blocklen = params->base_bdev_blocklen; + + return raid_bdev; +} + +static void +delete_raid_bdev(struct raid_bdev *raid_bdev) +{ + struct raid_base_bdev_info *base_info; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + free(base_info->bdev); + } + free(raid_bdev->base_bdev_info); + free(raid_bdev); +} + +static struct raid5_info * +create_raid5(struct raid5_params *params) +{ + struct raid_bdev *raid_bdev = create_raid_bdev(params); + + SPDK_CU_ASSERT_FATAL(raid5_start(raid_bdev) == 0); + + return raid_bdev->module_private; +} + +static void +delete_raid5(struct raid5_info *r5info) +{ + struct raid_bdev *raid_bdev = r5info->raid_bdev; + + raid5_stop(raid_bdev); + + delete_raid_bdev(raid_bdev); +} + +static void +test_raid5_start(void) +{ + struct raid5_params *params; + + RAID5_PARAMS_FOR_EACH(params) { + struct raid5_info *r5info; + + r5info = create_raid5(params); + + CU_ASSERT_EQUAL(r5info->stripe_blocks, params->strip_size * (params->num_base_bdevs - 1)); + CU_ASSERT_EQUAL(r5info->total_stripes, params->base_bdev_blockcnt / params->strip_size); + CU_ASSERT_EQUAL(r5info->raid_bdev->bdev.blockcnt, + (params->base_bdev_blockcnt - params->base_bdev_blockcnt % params->strip_size) * + (params->num_base_bdevs - 1)); + CU_ASSERT_EQUAL(r5info->raid_bdev->bdev.optimal_io_boundary, r5info->stripe_blocks); + + delete_raid5(r5info); + } +} + +int +main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("raid5", test_setup, test_cleanup); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if (CU_add_test(suite, "test_raid5_start", test_raid5_start) == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/test/unit/unittest.sh b/test/unit/unittest.sh index 20524dec5..b0d35c670 100755 --- a/test/unit/unittest.sh +++ b/test/unit/unittest.sh @@ -168,6 +168,10 @@ if [ $SPDK_TEST_PMDK -eq 1 ]; then run_test "unittest_bdev_pmem" $valgrind $testdir/lib/bdev/pmem/bdev_pmem_ut fi +if [ $SPDK_TEST_RAID5 -eq 1 ]; then + run_test "unittest_bdev_raid5" $valgrind $testdir/lib/bdev/raid/raid5.c/raid5_ut +fi + run_test "unittest_blob_blobfs" unittest_blob run_test "unittest_event" unittest_event if [ $(uname -s) = Linux ]; then