module/raid: allow assembly of a degraded raid
Add num_base_bdevs_operational to raid_bdev and use it to determine the required number of base bdevs. Change-Id: I31b39cc8ea708b6cdce748f015949e4c9fdeb3cd Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
This commit is contained in:
parent
ea4b2f6d75
commit
e325fbafec
@ -10045,6 +10045,7 @@ Example response:
|
||||
"raid_level": "raid0",
|
||||
"num_base_bdevs": 2,
|
||||
"num_base_bdevs_discovered": 2,
|
||||
"num_base_bdevs_operational": 2,
|
||||
"base_bdevs_list": [
|
||||
{
|
||||
"name": "malloc0",
|
||||
@ -10070,6 +10071,7 @@ Example response:
|
||||
"raid_level": "raid0",
|
||||
"num_base_bdevs": 2,
|
||||
"num_base_bdevs_discovered": 1,
|
||||
"num_base_bdevs_operational": 2,
|
||||
"base_bdevs_list": [
|
||||
{
|
||||
"name": "malloc2",
|
||||
|
@ -640,6 +640,8 @@ raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ct
|
||||
spdk_json_write_named_bool(w, "superblock", raid_bdev->sb != NULL);
|
||||
spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
|
||||
spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
|
||||
spdk_json_write_named_uint32(w, "num_base_bdevs_operational",
|
||||
raid_bdev->num_base_bdevs_operational);
|
||||
spdk_json_write_name(w, "base_bdevs_list");
|
||||
spdk_json_write_array_begin(w);
|
||||
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
||||
@ -1142,6 +1144,8 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
|
||||
return rc;
|
||||
}
|
||||
|
||||
raid_bdev->num_base_bdevs_operational = num_base_bdevs;
|
||||
|
||||
if (superblock) {
|
||||
spdk_uuid_generate(&raid_bdev->bdev.uuid);
|
||||
}
|
||||
@ -1170,6 +1174,10 @@ raid_bdev_configure_md(struct raid_bdev *raid_bdev)
|
||||
for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
|
||||
base_bdev = raid_bdev->base_bdev_info[i].bdev;
|
||||
|
||||
if (base_bdev == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
raid_bdev->bdev.md_len = spdk_bdev_get_md_size(base_bdev);
|
||||
raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(base_bdev);
|
||||
@ -1350,10 +1358,12 @@ raid_bdev_configure(struct raid_bdev *raid_bdev)
|
||||
int rc = 0;
|
||||
|
||||
assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
|
||||
assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs);
|
||||
assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational);
|
||||
|
||||
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
||||
assert(base_info->bdev != NULL);
|
||||
if (base_info->bdev == NULL) {
|
||||
continue;
|
||||
}
|
||||
/* Check blocklen for all base bdevs that it should be same */
|
||||
if (blocklen == 0) {
|
||||
blocklen = base_info->bdev->blocklen;
|
||||
@ -1739,7 +1749,7 @@ raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
|
||||
/* There is no base bdev for this raid, so free the raid device. */
|
||||
raid_bdev_cleanup_and_free(raid_bdev);
|
||||
}
|
||||
} else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) {
|
||||
} else if (raid_bdev->num_base_bdevs_operational-- == raid_bdev->min_base_bdevs_operational) {
|
||||
raid_bdev_deconfigure(raid_bdev, NULL, NULL);
|
||||
} else {
|
||||
return raid_bdev_suspend(raid_bdev, raid_bdev_remove_base_bdev_on_suspended, base_info);
|
||||
@ -1878,8 +1888,10 @@ raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info)
|
||||
|
||||
raid_bdev->num_base_bdevs_discovered++;
|
||||
assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
|
||||
assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs);
|
||||
assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational);
|
||||
|
||||
if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
|
||||
if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) {
|
||||
rc = raid_bdev_configure(raid_bdev);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc));
|
||||
@ -2047,8 +2059,6 @@ _raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name,
|
||||
struct raid_base_bdev_info *base_info;
|
||||
int rc;
|
||||
|
||||
assert(name != NULL || uuid != NULL);
|
||||
|
||||
if (slot >= raid_bdev->num_base_bdevs) {
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2084,6 +2094,10 @@ _raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name,
|
||||
base_info->data_offset = data_offset;
|
||||
base_info->data_size = data_size;
|
||||
|
||||
if (name == NULL && uuid == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = raid_bdev_configure_base_bdev(base_info);
|
||||
if (rc != 0) {
|
||||
if (rc != -ENODEV) {
|
||||
@ -2111,6 +2125,8 @@ _raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name,
|
||||
int
|
||||
raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot)
|
||||
{
|
||||
assert(name != NULL);
|
||||
|
||||
return _raid_bdev_add_base_device(raid_bdev, name, NULL, slot, 0, 0);
|
||||
}
|
||||
|
||||
@ -2119,8 +2135,15 @@ raid_bdev_add_base_device_from_sb(struct raid_bdev *raid_bdev,
|
||||
const struct raid_bdev_sb_base_bdev *sb_base_bdev)
|
||||
{
|
||||
int rc;
|
||||
const struct spdk_uuid *uuid;
|
||||
|
||||
rc = _raid_bdev_add_base_device(raid_bdev, NULL, &sb_base_bdev->uuid, sb_base_bdev->slot,
|
||||
if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) {
|
||||
uuid = &sb_base_bdev->uuid;
|
||||
} else {
|
||||
uuid = NULL;
|
||||
}
|
||||
|
||||
rc = _raid_bdev_add_base_device(raid_bdev, NULL, uuid, sb_base_bdev->slot,
|
||||
sb_base_bdev->data_offset, sb_base_bdev->data_size);
|
||||
|
||||
if (rc == -ENODEV) {
|
||||
@ -2149,15 +2172,17 @@ raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb)
|
||||
memcpy(raid_bdev->sb, sb, sb->length);
|
||||
|
||||
for (i = 0; i < sb->base_bdevs_size; i++) {
|
||||
const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i];
|
||||
if (sb->base_bdevs[i].state == RAID_SB_BASE_BDEV_CONFIGURED) {
|
||||
raid_bdev->num_base_bdevs_operational++;
|
||||
}
|
||||
}
|
||||
|
||||
if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) {
|
||||
rc = raid_bdev_add_base_device_from_sb(raid_bdev, sb_base_bdev);
|
||||
for (i = 0; i < sb->base_bdevs_size; i++) {
|
||||
rc = raid_bdev_add_base_device_from_sb(raid_bdev, &sb->base_bdevs[i]);
|
||||
if (rc != 0) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
|
@ -153,6 +153,9 @@ struct raid_bdev {
|
||||
/* number of base bdevs discovered */
|
||||
uint8_t num_base_bdevs_discovered;
|
||||
|
||||
/* number of operational base bdevs */
|
||||
uint8_t num_base_bdevs_operational;
|
||||
|
||||
/* minimum number of viable base bdevs that are required by array to operate */
|
||||
uint8_t min_base_bdevs_operational;
|
||||
|
||||
|
@ -1089,8 +1089,10 @@ raid5f_start(struct raid_bdev *raid_bdev)
|
||||
|
||||
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
||||
min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
|
||||
if (base_info->bdev) {
|
||||
alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
|
||||
}
|
||||
}
|
||||
|
||||
base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
|
||||
|
||||
|
@ -124,6 +124,7 @@ function verify_raid_bdev_state() (
|
||||
local expected_state=$2
|
||||
local raid_level=$3
|
||||
local strip_size=$4
|
||||
local num_base_bdevs_operational=$5
|
||||
local raid_bdev
|
||||
local raid_bdev_info
|
||||
local num_base_bdevs
|
||||
@ -173,6 +174,12 @@ function verify_raid_bdev_state() (
|
||||
echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
|
||||
return 1
|
||||
fi
|
||||
|
||||
tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
|
||||
if [ "$num_base_bdevs_operational" != "$tmp" ]; then
|
||||
echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
|
||||
return 1
|
||||
fi
|
||||
)
|
||||
|
||||
function has_redundancy() {
|
||||
@ -206,7 +213,7 @@ function raid_state_function_test() {
|
||||
# Step1: create a RAID bdev with no base bdevs
|
||||
# Expect state: CONFIGURING
|
||||
$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
$rpc_py bdev_raid_delete $raid_bdev_name
|
||||
@ -216,7 +223,7 @@ function raid_state_function_test() {
|
||||
$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
|
||||
$rpc_py bdev_malloc_create 32 512 -b ${base_bdevs[0]}
|
||||
waitforbdev ${base_bdevs[0]}
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
$rpc_py bdev_raid_delete $raid_bdev_name
|
||||
@ -225,13 +232,13 @@ function raid_state_function_test() {
|
||||
# Expect state: ONLINE
|
||||
$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
|
||||
for ((i = 1; i < num_base_bdevs; i++)); do
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
$rpc_py bdev_malloc_create 32 512 -b ${base_bdevs[$i]}
|
||||
waitforbdev ${base_bdevs[$i]}
|
||||
done
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -243,7 +250,7 @@ function raid_state_function_test() {
|
||||
else
|
||||
expected_state="online"
|
||||
fi
|
||||
if ! verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -353,7 +360,7 @@ function raid_superblock_test() {
|
||||
|
||||
# Create RAID bdev with superblock
|
||||
$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_pt[*]}" -n $raid_bdev_name -s
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -381,7 +388,7 @@ function raid_superblock_test() {
|
||||
# Try to create new RAID bdev from malloc bdevs
|
||||
# Should not reach online state due to superblock still present on base bdevs
|
||||
$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_malloc[*]}" -n $raid_bdev_name
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -396,7 +403,7 @@ function raid_superblock_test() {
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
|
||||
|
||||
# Check if the RAID bdev was assembled from superblock
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -406,7 +413,7 @@ function raid_superblock_test() {
|
||||
done
|
||||
|
||||
# Check if the RAID bdev is in online state
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size; then
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -415,6 +422,95 @@ function raid_superblock_test() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
if has_redundancy $raid_level; then
|
||||
# Delete one base bdev
|
||||
$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
|
||||
|
||||
# Check if the RAID bdev is in online state (degraded)
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Stop the RAID bdev
|
||||
$rpc_py bdev_raid_delete $raid_bdev_name
|
||||
raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
|
||||
if [ -n "$raid_bdev" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Delete remaining base bdevs
|
||||
for ((i = 1; i < num_base_bdevs; i++)); do
|
||||
$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
|
||||
done
|
||||
|
||||
# Re-add base bdevs from the second up to (not including) the last one
|
||||
for ((i = 1; i < num_base_bdevs - 1; i++)); do
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
|
||||
|
||||
# Check if the RAID bdev is in configuring state
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Re-add the last base bdev
|
||||
i=$((num_base_bdevs - 1))
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
|
||||
|
||||
# Check if the RAID bdev is in online state (degraded)
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ $num_base_bdevs -gt 2 ]; then
|
||||
# Stop the RAID bdev
|
||||
$rpc_py bdev_raid_delete $raid_bdev_name
|
||||
raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
|
||||
if [ -n "$raid_bdev" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Delete remaining base bdevs
|
||||
for ((i = 1; i < num_base_bdevs; i++)); do
|
||||
$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
|
||||
done
|
||||
|
||||
# Re-add first base bdev
|
||||
# This is the "failed" device and contains the "old" version of the superblock
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
|
||||
|
||||
# Check if the RAID bdev is in configuring state
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Re-add the last base bdev
|
||||
i=$((num_base_bdevs - 1))
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
|
||||
|
||||
# Check if the RAID bdev is in configuring state
|
||||
# This should use the newer superblock version and have n-1 online base bdevs
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Re-add remaining base bdevs
|
||||
for ((i = 1; i < num_base_bdevs - 1; i++)); do
|
||||
$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
|
||||
done
|
||||
|
||||
# Check if the RAID bdev is in online state (degraded)
|
||||
if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1)); then
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if the RAID bdev has the same UUID as when first created
|
||||
if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
killprocess $raid_pid
|
||||
|
||||
return 0
|
||||
|
Loading…
Reference in New Issue
Block a user