From 957acd43b9b8ec17998de29f34fbadcf42fecc3e Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 15 Dec 2022 15:43:09 +0100 Subject: [PATCH] module/raid: assemble raid bdev from superblock Change the bdev_raid examine procedure to read the superblock from the examined base bdev. If a valid superblock is found, re-create the raid_bdev from it. Change-Id: I4bd589647a207a216ecf0dec9baf11c5d691f5d5 Signed-off-by: Artur Paszkiewicz --- doc/jsonrpc.md | 2 + module/bdev/raid/bdev_raid.c | 466 +++++++++++++++--- module/bdev/raid/bdev_raid.h | 3 + module/bdev/raid/bdev_raid_rpc.c | 4 + module/bdev/raid/bdev_raid_sb.c | 162 ++++++ module/bdev/raid/bdev_raid_sb.h | 3 + test/bdev/bdev_raid.sh | 95 ++++ .../lib/bdev/raid/bdev_raid.c/bdev_raid_ut.c | 13 + .../raid/bdev_raid_sb.c/bdev_raid_sb_ut.c | 140 ++++++ 9 files changed, 820 insertions(+), 68 deletions(-) diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index e3a0da070..b6020bb75 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -10039,6 +10039,7 @@ Example response: "result": [ { "name": "RaidBdev0", + "uuid": "a0bf80ba-96c1-4a81-a008-ad2d1b4b814c", "strip_size_kb": 128, "state": "online", "raid_level": "raid0", @@ -10051,6 +10052,7 @@ Example response: }, { "name": "RaidBdev1", + "uuid": "f7cb71ed-2d0e-4240-979e-27b0b7735f36", "strip_size_kb": 128, "state": "configuring", "raid_level": "raid0", diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index c1d880fae..fb793054c 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -225,23 +225,27 @@ raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info) assert(spdk_get_thread() == spdk_thread_get_app_thread()); - free(base_info->name); - base_info->name = NULL; - - if (base_info->bdev == NULL) { - return; + if (base_info->bdev != NULL) { + assert(base_info->desc); + assert(raid_bdev->num_base_bdevs_discovered); + raid_bdev->num_base_bdevs_discovered--; + spdk_bdev_module_release_bdev(base_info->bdev); + base_info->bdev = NULL; } - assert(base_info->desc); - spdk_bdev_module_release_bdev(base_info->bdev); - spdk_bdev_close(base_info->desc); - base_info->desc = NULL; - base_info->bdev = NULL; - spdk_put_io_channel(base_info->app_thread_ch); - base_info->app_thread_ch = NULL; + if (base_info->app_thread_ch != NULL) { + spdk_put_io_channel(base_info->app_thread_ch); + base_info->app_thread_ch = NULL; + } - assert(raid_bdev->num_base_bdevs_discovered); - raid_bdev->num_base_bdevs_discovered--; + if (base_info->desc != NULL) { + spdk_bdev_close(base_info->desc); + base_info->desc = NULL; + } + + free(base_info->name); + base_info->name = NULL; + spdk_uuid_set_null(&base_info->uuid); } static void @@ -932,7 +936,7 @@ static struct spdk_bdev_module g_raid_if = { .fini_start = raid_bdev_fini_start, .module_fini = raid_bdev_exit, .get_ctx_size = raid_bdev_get_ctx_size, - .examine_config = raid_bdev_examine, + .examine_disk = raid_bdev_examine, .async_init = false, .async_fini = false, }; @@ -1371,7 +1375,18 @@ raid_bdev_configure(struct raid_bdev *raid_bdev) } if (raid_bdev->sb != NULL) { - raid_bdev_init_superblock(raid_bdev); + if (spdk_uuid_is_null(&raid_bdev->sb->uuid)) { + raid_bdev_init_superblock(raid_bdev); + } else { + if (raid_bdev->sb->block_size != blocklen) { + SPDK_ERRLOG("blocklen does not match value in superblock\n"); + return -EINVAL; + } + if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) { + SPDK_ERRLOG("blockcnt does not match value in superblock\n"); + return -EINVAL; + } + } return raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb); } @@ -1803,12 +1818,41 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) struct raid_bdev *raid_bdev = base_info->raid_bdev; struct spdk_bdev_desc *desc; struct spdk_bdev *bdev; + const struct spdk_uuid *bdev_uuid; int rc; assert(spdk_get_thread() == spdk_thread_get_app_thread()); - assert(base_info->name != NULL); assert(base_info->bdev == NULL); + if (!spdk_uuid_is_null(&base_info->uuid)) { + const char *bdev_name; + + for (bdev = spdk_bdev_first(); bdev != NULL; bdev = spdk_bdev_next(bdev)) { + if (spdk_uuid_compare(&base_info->uuid, spdk_bdev_get_uuid(bdev)) == 0) { + break; + } + } + + if (bdev == NULL) { + return -ENODEV; + } + + bdev_name = spdk_bdev_get_name(bdev); + + if (base_info->name == NULL) { + base_info->name = strdup(bdev_name); + if (base_info->name == NULL) { + return -ENOMEM; + } + } else if (strcmp(base_info->name, bdev_name) != 0) { + SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n", + bdev_name, base_info->name); + return -EINVAL; + } + } + + assert(base_info->name != NULL); + rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc); if (rc != 0) { if (rc != -ENODEV) { @@ -1818,13 +1862,22 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) } bdev = spdk_bdev_desc_get_bdev(desc); + bdev_uuid = spdk_bdev_get_uuid(bdev); - if (raid_bdev->sb != NULL && spdk_uuid_is_null(spdk_bdev_get_uuid(bdev))) { + if (raid_bdev->sb != NULL && spdk_uuid_is_null(bdev_uuid)) { SPDK_ERRLOG("Base bdev '%s' does not have a valid UUID\n", base_info->name); spdk_bdev_close(desc); return -EINVAL; } + if (spdk_uuid_is_null(&base_info->uuid)) { + spdk_uuid_copy(&base_info->uuid, bdev_uuid); + } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) { + SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name); + spdk_bdev_close(desc); + return -EINVAL; + } + rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if); if (rc != 0) { SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); @@ -1844,39 +1897,103 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) return -ENOMEM; } + if (base_info->data_offset == 0 && raid_bdev->sb != NULL) { + assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % bdev->blocklen) == 0); + base_info->data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / bdev->blocklen; + } + + if (bdev->optimal_io_boundary) { + base_info->data_offset = spdk_divide_round_up(base_info->data_offset, + bdev->optimal_io_boundary) * bdev->optimal_io_boundary; + } + + if (base_info->data_offset > bdev->blockcnt) { + SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", + base_info->data_offset, bdev->blockcnt, base_info->name); + rc = -EINVAL; + goto out; + } + + if (base_info->data_size == 0) { + base_info->data_size = bdev->blockcnt - base_info->data_offset; + } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) { + SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n", + bdev->blockcnt, base_info->name); + rc = -EINVAL; + goto out; + } + base_info->bdev = bdev; base_info->desc = desc; base_info->blockcnt = bdev->blockcnt; - base_info->data_offset = 0; - base_info->data_size = base_info->bdev->blockcnt; + raid_bdev->num_base_bdevs_discovered++; assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs); - if (raid_bdev->sb != NULL) { - assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % bdev->blocklen) == 0); - base_info->data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / bdev->blocklen; - - if (bdev->optimal_io_boundary) { - base_info->data_offset = spdk_divide_round_up(base_info->data_offset, - bdev->optimal_io_boundary) * bdev->optimal_io_boundary; - } - - base_info->data_size = base_info->bdev->blockcnt - base_info->data_offset; - - if (base_info->data_offset > bdev->blockcnt) { - SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n", - base_info->data_offset, bdev->blockcnt, base_info->name); - return -EINVAL; - } - } - if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) { rc = raid_bdev_configure(raid_bdev); if (rc != 0) { - SPDK_ERRLOG("Failed to configure raid bdev\n"); - return rc; + SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc)); } } +out: + if (rc != 0) { + raid_bdev_free_base_bdev_resource(base_info); + } + return rc; +} + +static int +_raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, + const struct spdk_uuid *uuid, uint8_t slot, uint64_t data_offset, uint64_t data_size) +{ + struct raid_base_bdev_info *base_info; + int rc; + + assert(name != NULL || uuid != NULL); + + if (slot >= raid_bdev->num_base_bdevs) { + return -EINVAL; + } + + base_info = &raid_bdev->base_bdev_info[slot]; + + if (base_info->name != NULL) { + SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", + slot, raid_bdev->bdev.name, base_info->name); + return -EBUSY; + } + + if (!spdk_uuid_is_null(&base_info->uuid)) { + char uuid_str[SPDK_UUID_STRING_LEN]; + + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid); + SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev with uuid %s\n", + slot, raid_bdev->bdev.name, uuid_str); + return -EBUSY; + } + + if (name != NULL) { + base_info->name = strdup(name); + if (base_info->name == NULL) { + return -ENOMEM; + } + } + + if (uuid != NULL) { + spdk_uuid_copy(&base_info->uuid, uuid); + } + + base_info->data_offset = data_offset; + base_info->data_size = data_size; + + rc = raid_bdev_configure_base_bdev(base_info); + if (rc != 0) { + if (rc != -ENODEV) { + SPDK_ERRLOG("Failed to allocate resource for base bdev\n"); + } + return rc; + } return 0; } @@ -1897,35 +2014,227 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info) int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot) { - struct raid_base_bdev_info *base_info; + return _raid_bdev_add_base_device(raid_bdev, name, NULL, slot, 0, 0); +} + +static int +raid_bdev_add_base_device_from_sb(struct raid_bdev *raid_bdev, + const struct raid_bdev_sb_base_bdev *sb_base_bdev) +{ int rc; - if (slot >= raid_bdev->num_base_bdevs) { - return -EINVAL; + rc = _raid_bdev_add_base_device(raid_bdev, NULL, &sb_base_bdev->uuid, sb_base_bdev->slot, + sb_base_bdev->data_offset, sb_base_bdev->data_size); + + if (rc == -ENODEV) { + rc = 0; } - base_info = &raid_bdev->base_bdev_info[slot]; + return rc; +} - if (base_info->name != NULL) { - SPDK_ERRLOG("Slot %u on raid bdev '%s' already assigned to bdev '%s'\n", - slot, raid_bdev->bdev.name, base_info->name); - return -EBUSY; - } +static int +raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb) +{ + struct raid_bdev *raid_bdev; + uint8_t i; + int rc; - base_info->name = strdup(name); - if (base_info->name == NULL) { - return -ENOMEM; - } - - rc = raid_bdev_configure_base_bdev(base_info); + rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs, + sb->level, true, &raid_bdev); if (rc != 0) { - if (rc != -ENODEV) { - SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", name); - } return rc; } + spdk_uuid_copy(&raid_bdev->bdev.uuid, &sb->uuid); + + assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH); + memcpy(raid_bdev->sb, sb, sb->length); + + for (i = 0; i < sb->base_bdevs_size; i++) { + const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i]; + + if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { + rc = raid_bdev_add_base_device_from_sb(raid_bdev, sb_base_bdev); + if (rc != 0) { + goto err; + } + } + } + return 0; +err: + raid_bdev_delete(raid_bdev, NULL, NULL); + return rc; +} + +static void +raid_bdev_examine_no_sb(struct spdk_bdev *bdev) +{ + struct raid_bdev *raid_bdev; + struct raid_base_bdev_info *base_info; + + TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { + RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { + if (base_info->bdev == NULL && base_info->name != NULL && + strcmp(bdev->name, base_info->name) == 0) { + assert(raid_bdev->sb == NULL); + raid_bdev_configure_base_bdev(base_info); + break; + } + } + } +} + +static void +raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev) +{ + const struct raid_bdev_sb_base_bdev *sb_base_bdev; + struct raid_bdev *raid_bdev; + uint8_t i; + int rc; + + if (sb->block_size != bdev->blocklen) { + SPDK_ERRLOG("Bdev %s block size does not match the value in superblock\n", + bdev->name); + return; + } + + if (spdk_uuid_is_null(&sb->uuid)) { + SPDK_ERRLOG("Invalid raid bdev UUID in superblock on bdev %s\n", bdev->name); + return; + } + + TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { + if (spdk_uuid_compare(&raid_bdev->bdev.uuid, &sb->uuid) == 0) { + break; + } + } + + if (raid_bdev) { + if (sb->seq_number > raid_bdev->sb->seq_number) { + SPDK_DEBUGLOG(bdev_raid, + "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n", + bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); + + if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) { + SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n", + raid_bdev->bdev.name, bdev->name); + return; + } + + /* remove and then recreate the raid bdev using the newer superblock */ + raid_bdev_delete(raid_bdev, NULL, NULL); + raid_bdev = NULL; + } else if (sb->seq_number < raid_bdev->sb->seq_number) { + SPDK_DEBUGLOG(bdev_raid, + "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n", + bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number); + /* use the current raid bdev superblock */ + sb = raid_bdev->sb; + } + } + + for (i = 0; i < sb->base_bdevs_size; i++) { + sb_base_bdev = &sb->base_bdevs[i]; + + assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false); + + if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) { + break; + } + } + + if (i == sb->base_bdevs_size) { + SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n"); + return; + } + + if (!raid_bdev) { + rc = raid_bdev_create_from_sb(sb); + if (rc != 0) { + SPDK_ERRLOG("Failed to create raid bdev %s: %s\n", + sb->name, spdk_strerror(-rc)); + } + return; + } + + if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) { + struct raid_base_bdev_info *iter, *base_info = NULL; + + RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) { + if (!spdk_uuid_is_null(&iter->uuid) && + spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) { + base_info = iter; + break; + } + } + + if (base_info == NULL) { + SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n", + bdev->name, raid_bdev->bdev.name); + return; + } + + rc = raid_bdev_configure_base_bdev(base_info); + if (rc != 0) { + SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n", + bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc)); + } + } else { + SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n", + bdev->name, raid_bdev->bdev.name); + } +} + +struct raid_bdev_examine_ctx { + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; +}; + +static void +raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx) +{ + if (ctx->ch) { + spdk_put_io_channel(ctx->ch); + } + + if (ctx->desc) { + spdk_bdev_close(ctx->desc); + } + + free(ctx); +} + +static void +raid_bdev_examine_load_sb_cb(const struct raid_bdev_superblock *sb, int status, void *_ctx) +{ + struct raid_bdev_examine_ctx *ctx = _ctx; + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); + + switch (status) { + case 0: + /* valid superblock found */ + SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name); + raid_bdev_examine_sb(sb, bdev); + break; + case -EINVAL: + /* no valid superblock, check if it can be claimed anyway */ + raid_bdev_examine_no_sb(bdev); + break; + default: + SPDK_ERRLOG("Failed to examine bdev %s: %s\n", + bdev->name, spdk_strerror(-status)); + break; + } + + raid_bdev_examine_ctx_free(ctx); + spdk_bdev_module_examine_done(&g_raid_if); +} + +static void +raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ } /* @@ -1941,19 +2250,40 @@ raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t static void raid_bdev_examine(struct spdk_bdev *bdev) { - struct raid_bdev *raid_bdev; - struct raid_base_bdev_info *base_info; + struct raid_bdev_examine_ctx *ctx; + int rc; - TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { - RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { - if (base_info->bdev == NULL && base_info->name != NULL && - strcmp(bdev->name, base_info->name) == 0) { - raid_bdev_configure_base_bdev(base_info); - break; - } - } + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + SPDK_ERRLOG("Failed to examine bdev %s: %s\n", + bdev->name, spdk_strerror(ENOMEM)); + return; } + rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, raid_bdev_examine_event_cb, NULL, + &ctx->desc); + if (rc) { + SPDK_ERRLOG("Failed to open bdev %s: %s\n", + bdev->name, spdk_strerror(-rc)); + goto err; + } + + ctx->ch = spdk_bdev_get_io_channel(ctx->desc); + if (!ctx->ch) { + SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev->name); + goto err; + } + + rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_cb, ctx); + if (rc) { + SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n", + bdev->name, spdk_strerror(-rc)); + goto err; + } + + return; +err: + raid_bdev_examine_ctx_free(ctx); spdk_bdev_module_examine_done(&g_raid_if); } diff --git a/module/bdev/raid/bdev_raid.h b/module/bdev/raid/bdev_raid.h index 9600d978f..5067fe5aa 100644 --- a/module/bdev/raid/bdev_raid.h +++ b/module/bdev/raid/bdev_raid.h @@ -60,6 +60,9 @@ struct raid_base_bdev_info { /* name of the bdev */ char *name; + /* uuid of the bdev */ + struct spdk_uuid uuid; + /* pointer to base spdk bdev */ struct spdk_bdev *bdev; diff --git a/module/bdev/raid/bdev_raid_rpc.c b/module/bdev/raid/bdev_raid_rpc.c index c71089fbc..d38dbba20 100644 --- a/module/bdev/raid/bdev_raid_rpc.c +++ b/module/bdev/raid/bdev_raid_rpc.c @@ -87,8 +87,12 @@ rpc_bdev_raid_get_bdevs(struct spdk_jsonrpc_request *request, /* Get raid bdev list based on the category requested */ TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { if (raid_bdev->state == state || state == RAID_BDEV_STATE_MAX) { + char uuid_str[SPDK_UUID_STRING_LEN]; + spdk_json_write_object_begin(w); spdk_json_write_named_string(w, "name", raid_bdev->bdev.name); + spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &raid_bdev->bdev.uuid); + spdk_json_write_named_string(w, "uuid", uuid_str); raid_bdev_write_info_json(raid_bdev, w); spdk_json_write_object_end(w); } diff --git a/module/bdev/raid/bdev_raid_sb.c b/module/bdev/raid/bdev_raid_sb.c index 7e9741906..267d7e956 100644 --- a/module/bdev/raid/bdev_raid_sb.c +++ b/module/bdev/raid/bdev_raid_sb.c @@ -12,11 +12,173 @@ #include "bdev_raid_sb.h" +struct raid_bdev_read_sb_ctx { + struct spdk_bdev_desc *desc; + struct spdk_io_channel *ch; + raid_bdev_load_sb_cb cb; + void *cb_ctx; + void *buf; + uint32_t buf_size; +}; + struct raid_bdev_save_sb_ctx { raid_bdev_save_sb_cb cb; void *cb_ctx; }; +static void raid_bdev_read_sb_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); + +static int +raid_bdev_parse_superblock(struct raid_bdev_read_sb_ctx *ctx) +{ + struct raid_bdev_superblock *sb = ctx->buf; + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); + uint32_t crc; + + if (memcmp(sb->signature, RAID_BDEV_SB_SIG, sizeof(sb->signature))) { + SPDK_DEBUGLOG(bdev_raid_sb, "invalid signature\n"); + return -EINVAL; + } + + if (sb->length > ctx->buf_size) { + if (sb->length > RAID_BDEV_SB_MAX_LENGTH) { + SPDK_DEBUGLOG(bdev_raid_sb, "invalid length\n"); + return -EINVAL; + } + + return -EAGAIN; + } + + crc = sb->crc; + raid_bdev_sb_update_crc(sb); + if (sb->crc != crc) { + SPDK_WARNLOG("Incorrect superblock crc on bdev %s\n", spdk_bdev_get_name(bdev)); + sb->crc = crc; + return -EINVAL; + } + + if (sb->version.major > RAID_BDEV_SB_VERSION_MAJOR) { + SPDK_ERRLOG("Not supported superblock major version %d on bdev %s\n", + sb->version.major, spdk_bdev_get_name(bdev)); + return -EINVAL; + } + + if (sb->version.major == RAID_BDEV_SB_VERSION_MAJOR && + sb->version.minor > RAID_BDEV_SB_VERSION_MINOR) { + SPDK_WARNLOG("Superblock minor version %d on bdev %s is higher than the currently supported: %d\n", + sb->version.minor, spdk_bdev_get_name(bdev), RAID_BDEV_SB_VERSION_MINOR); + } + + return 0; +} + +static void +raid_bdev_read_sb_ctx_free(struct raid_bdev_read_sb_ctx *ctx) +{ + spdk_dma_free(ctx->buf); + + free(ctx); +} + +static int +raid_bdev_read_sb_remainder(struct raid_bdev_read_sb_ctx *ctx) +{ + struct raid_bdev_superblock *sb = ctx->buf; + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc); + uint32_t buf_size_prev; + void *buf; + int rc; + + buf_size_prev = ctx->buf_size; + ctx->buf_size = SPDK_ALIGN_CEIL(sb->length, spdk_bdev_get_block_size(bdev)); + buf = spdk_dma_realloc(ctx->buf, ctx->buf_size, spdk_bdev_get_buf_align(bdev), NULL); + if (buf == NULL) { + SPDK_ERRLOG("Failed to reallocate buffer\n"); + return -ENOMEM; + } + ctx->buf = buf; + + rc = spdk_bdev_read(ctx->desc, ctx->ch, ctx->buf + buf_size_prev, buf_size_prev, + ctx->buf_size - buf_size_prev, raid_bdev_read_sb_cb, ctx); + if (rc != 0) { + SPDK_ERRLOG("Failed to read bdev %s superblock remainder: %s\n", + spdk_bdev_get_name(bdev), spdk_strerror(-rc)); + return rc; + } + + return 0; +} + +static void +raid_bdev_read_sb_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct raid_bdev_read_sb_ctx *ctx = cb_arg; + struct raid_bdev_superblock *sb = NULL; + int status; + + spdk_bdev_free_io(bdev_io); + + if (success) { + status = raid_bdev_parse_superblock(ctx); + if (status == -EAGAIN) { + status = raid_bdev_read_sb_remainder(ctx); + if (status == 0) { + return; + } + } else if (status != 0) { + SPDK_DEBUGLOG(bdev_raid_sb, "failed to parse bdev %s superblock\n", + spdk_bdev_get_name(spdk_bdev_desc_get_bdev(ctx->desc))); + } else { + sb = ctx->buf; + } + } else { + status = -EIO; + } + + if (ctx->cb) { + ctx->cb(sb, status, ctx->cb_ctx); + } + + raid_bdev_read_sb_ctx_free(ctx); +} + +int +raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + raid_bdev_load_sb_cb cb, void *cb_ctx) +{ + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct raid_bdev_read_sb_ctx *ctx; + int rc; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + return -ENOMEM; + } + + ctx->desc = desc; + ctx->ch = ch; + ctx->cb = cb; + ctx->cb_ctx = cb_ctx; + ctx->buf_size = SPDK_ALIGN_CEIL(sizeof(struct raid_bdev_superblock), + spdk_bdev_get_block_size(bdev)); + ctx->buf = spdk_dma_malloc(ctx->buf_size, spdk_bdev_get_buf_align(bdev), NULL); + if (!ctx->buf) { + rc = -ENOMEM; + goto err; + } + + rc = spdk_bdev_read(desc, ch, ctx->buf, 0, ctx->buf_size, raid_bdev_read_sb_cb, ctx); + if (rc) { + goto err; + } + + return 0; +err: + raid_bdev_read_sb_ctx_free(ctx); + + return rc; +} + static void raid_bdev_write_sb_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { diff --git a/module/bdev/raid/bdev_raid_sb.h b/module/bdev/raid/bdev_raid_sb.h index 977b901fc..3edb92a0e 100644 --- a/module/bdev/raid/bdev_raid_sb.h +++ b/module/bdev/raid/bdev_raid_sb.h @@ -86,8 +86,11 @@ struct raid_bdev_superblock { }; SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_superblock) == 192, "incorrect size"); +typedef void (*raid_bdev_load_sb_cb)(const struct raid_bdev_superblock *sb, int status, void *ctx); typedef void (*raid_bdev_save_sb_cb)(int status, void *ctx); +int raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + raid_bdev_load_sb_cb cb, void *cb_ctx); int raid_bdev_save_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, const struct raid_bdev_superblock *sb, raid_bdev_save_sb_cb cb, void *cb_ctx); void raid_bdev_sb_update_crc(struct raid_bdev_superblock *sb); diff --git a/test/bdev/bdev_raid.sh b/test/bdev/bdev_raid.sh index 96322f7d8..51cb5e445 100755 --- a/test/bdev/bdev_raid.sh +++ b/test/bdev/bdev_raid.sh @@ -313,6 +313,99 @@ function raid0_resize_test() { return 0 } +function raid_superblock_test() { + local raid_level=$1 + local num_base_bdevs=$2 + local base_bdevs_malloc=() + local base_bdevs_pt=() + local base_bdevs_pt_uuid=() + local raid_bdev_name="raid_bdev1" + local raid_bdev_uuid + local raid_bdev + local strip_size + local strip_size_create_arg + + if [ $raid_level != "raid1" ]; then + strip_size=64 + strip_size_create_arg="-z $strip_size" + else + strip_size=0 + fi + + $rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid & + raid_pid=$! + echo "Process raid pid: $raid_pid" + waitforlisten $raid_pid $rpc_server + + # Create base bdevs + for ((i = 1; i <= num_base_bdevs; i++)); do + local bdev_malloc="malloc$i" + local bdev_pt="pt$i" + local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i" + + base_bdevs_malloc+=($bdev_malloc) + base_bdevs_pt+=($bdev_pt) + base_bdevs_pt_uuid+=($bdev_pt_uuid) + + $rpc_py bdev_malloc_create 32 512 -b $bdev_malloc + $rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid + done + + # Create RAID bdev with superblock + $rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_pt[*]}" -n $raid_bdev_name -s + if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size; then + return 1 + fi + + # Get RAID bdev's UUID + raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid | select(.)') + if [ -z "$raid_bdev_uuid" ]; then + return 1 + fi + + # Stop the RAID bdev + $rpc_py bdev_raid_delete $raid_bdev_name + raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]') + if [ -n "$raid_bdev" ]; then + return 1 + fi + + # Delete the passthru bdevs + for i in "${base_bdevs_pt[@]}"; do + $rpc_py bdev_passthru_delete $i + done + if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then + return 1 + fi + + # Re-add first base bdev + $rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]} + + # Check if the RAID bdev was assembled from superblock + if ! verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size; then + return 1 + fi + + # Re-add remaining base bdevs + for ((i = 1; i < num_base_bdevs; i++)); do + $rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]} + done + + # Check if the RAID bdev is in online state + if ! verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size; then + return 1 + fi + + # Check if the RAID bdev has the same UUID as when first created + if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then + return 1 + fi + + killprocess $raid_pid + + return 0 +} + trap 'on_error_exit;' ERR raid_function_test raid0 @@ -322,12 +415,14 @@ raid0_resize_test for n in {2..4}; do for level in raid0 concat raid1; do raid_state_function_test $level $n + raid_superblock_test $level $n done done if [ "$CONFIG_RAID5F" == y ]; then for n in {3..4}; do raid_state_function_test raid5f $n + raid_superblock_test raid5f $n done fi diff --git a/test/unit/lib/bdev/raid/bdev_raid.c/bdev_raid_ut.c b/test/unit/lib/bdev/raid/bdev_raid.c/bdev_raid_ut.c index 49af1c4f9..d57a3bcc0 100644 --- a/test/unit/lib/bdev/raid/bdev_raid.c/bdev_raid_ut.c +++ b/test/unit/lib/bdev/raid/bdev_raid.c/bdev_raid_ut.c @@ -125,8 +125,21 @@ DEFINE_STUB(spdk_bdev_get_dif_type, enum spdk_dif_type, (const struct spdk_bdev SPDK_DIF_DISABLE); DEFINE_STUB(spdk_bdev_is_dif_head_of_md, bool, (const struct spdk_bdev *bdev), false); DEFINE_STUB(spdk_bdev_notify_blockcnt_change, int, (struct spdk_bdev *bdev, uint64_t size), 0); +DEFINE_STUB(spdk_bdev_first, struct spdk_bdev *, (void), NULL); +DEFINE_STUB(spdk_bdev_next, struct spdk_bdev *, (struct spdk_bdev *prev), NULL); DEFINE_STUB_V(raid_bdev_sb_update_crc, (struct raid_bdev_superblock *sb)); +int +raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + raid_bdev_load_sb_cb cb, void *cb_ctx) +{ + if (cb) { + cb(NULL, -EINVAL, cb_ctx); + } + + return 0; +} + int raid_bdev_save_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, const struct raid_bdev_superblock *sb, raid_bdev_save_sb_cb cb, void *cb_ctx) diff --git a/test/unit/lib/bdev/raid/bdev_raid_sb.c/bdev_raid_sb_ut.c b/test/unit/lib/bdev/raid/bdev_raid_sb.c/bdev_raid_sb_ut.c index 2fdc7e0e8..35f8876d0 100644 --- a/test/unit/lib/bdev/raid/bdev_raid_sb.c/bdev_raid_sb_ut.c +++ b/test/unit/lib/bdev/raid/bdev_raid_sb.c/bdev_raid_sb_ut.c @@ -15,10 +15,13 @@ #define TEST_BLOCK_SIZE 512 DEFINE_STUB(spdk_bdev_desc_get_bdev, struct spdk_bdev *, (struct spdk_bdev_desc *desc), NULL); +DEFINE_STUB(spdk_bdev_get_name, const char *, (const struct spdk_bdev *bdev), "test_bdev"); +DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), TEST_BUF_ALIGN); DEFINE_STUB(spdk_bdev_get_block_size, uint32_t, (const struct spdk_bdev *bdev), TEST_BLOCK_SIZE); DEFINE_STUB_V(spdk_bdev_free_io, (struct spdk_bdev_io *g_bdev_io)); void *g_buf; +int g_read_counter; static int test_setup(void) @@ -39,6 +42,17 @@ test_cleanup(void) return 0; } +int +spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + void *buf, uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg) +{ + g_read_counter++; + memcpy(buf, g_buf + offset, nbytes); + cb(NULL, true, cb_arg); + return 0; +} + int spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, uint64_t offset, uint64_t nbytes, @@ -88,6 +102,130 @@ test_raid_bdev_save_base_bdev_superblock(void) CU_ASSERT(status == 0); } +static void +load_sb_cb(const struct raid_bdev_superblock *sb, int status, void *ctx) +{ + int *status_out = ctx; + + if (status == 0) { + CU_ASSERT(memcmp(sb, g_buf, sb->length) == 0); + } + + *status_out = status; +} + +static void +test_raid_bdev_load_base_bdev_superblock(void) +{ + struct raid_bdev_superblock *sb = g_buf; + int rc; + int status; + + /* valid superblock */ + prepare_sb(sb); + + g_read_counter = 0; + status = INT_MAX; + rc = raid_bdev_load_base_bdev_superblock(NULL, NULL, load_sb_cb, &status); + CU_ASSERT(rc == 0); + CU_ASSERT(status == 0); + CU_ASSERT(g_read_counter == 1); + + /* invalid signature */ + prepare_sb(sb); + sb->signature[3] = 'Z'; + raid_bdev_sb_update_crc(sb); + + g_read_counter = 0; + status = INT_MAX; + rc = raid_bdev_load_base_bdev_superblock(NULL, NULL, load_sb_cb, &status); + CU_ASSERT(rc == 0); + CU_ASSERT(status == -EINVAL); + CU_ASSERT(g_read_counter == 1); + + /* make the sb longer than 1 bdev block - expect 2 reads */ + prepare_sb(sb); + sb->length = TEST_BLOCK_SIZE * 3; + raid_bdev_sb_update_crc(sb); + + g_read_counter = 0; + status = INT_MAX; + rc = raid_bdev_load_base_bdev_superblock(NULL, NULL, load_sb_cb, &status); + CU_ASSERT(rc == 0); + CU_ASSERT(status == 0); + CU_ASSERT(g_read_counter == 2); + + /* corrupted sb contents, length > 1 bdev block - expect 2 reads */ + prepare_sb(sb); + sb->length = TEST_BLOCK_SIZE * 3; + raid_bdev_sb_update_crc(sb); + sb->reserved[0] = 0xff; + + g_read_counter = 0; + status = INT_MAX; + rc = raid_bdev_load_base_bdev_superblock(NULL, NULL, load_sb_cb, &status); + CU_ASSERT(rc == 0); + CU_ASSERT(status == -EINVAL); + CU_ASSERT(g_read_counter == 2); + + /* invalid signature, length > 1 bdev block - expect 1 read */ + prepare_sb(sb); + sb->signature[3] = 'Z'; + sb->length = TEST_BLOCK_SIZE * 3; + raid_bdev_sb_update_crc(sb); + + g_read_counter = 0; + status = INT_MAX; + rc = raid_bdev_load_base_bdev_superblock(NULL, NULL, load_sb_cb, &status); + CU_ASSERT(rc == 0); + CU_ASSERT(status == -EINVAL); + CU_ASSERT(g_read_counter == 1); +} + +static void +test_raid_bdev_parse_superblock(void) +{ + struct raid_bdev_superblock *sb = g_buf; + struct raid_bdev_read_sb_ctx ctx = { + .buf = g_buf, + .buf_size = TEST_BLOCK_SIZE, + }; + + /* valid superblock */ + prepare_sb(sb); + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == 0); + + /* invalid signature */ + prepare_sb(sb); + sb->signature[3] = 'Z'; + raid_bdev_sb_update_crc(sb); + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EINVAL); + + /* invalid crc */ + prepare_sb(sb); + sb->crc = 0xdeadbeef; + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EINVAL); + + /* corrupted sb contents */ + prepare_sb(sb); + sb->reserved[0] = 0xff; + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EINVAL); + + /* invalid major version */ + prepare_sb(sb); + sb->version.major = 9999; + raid_bdev_sb_update_crc(sb); + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EINVAL); + + /* sb longer than 1 bdev block */ + prepare_sb(sb); + sb->length = TEST_BLOCK_SIZE * 3; + raid_bdev_sb_update_crc(sb); + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EAGAIN); + ctx.buf_size = sb->length; + CU_ASSERT(raid_bdev_parse_superblock(&ctx) == 0); +} + int main(int argc, char **argv) { @@ -99,6 +237,8 @@ main(int argc, char **argv) suite = CU_add_suite("raid_sb", test_setup, test_cleanup); CU_ADD_TEST(suite, test_raid_bdev_save_base_bdev_superblock); + CU_ADD_TEST(suite, test_raid_bdev_load_base_bdev_superblock); + CU_ADD_TEST(suite, test_raid_bdev_parse_superblock); CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();