From 65f2af1dcbec9584b3ebaa3f575a0963ef857c81 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Wed, 28 Sep 2022 12:06:31 +0200 Subject: [PATCH] module/raid: continue operation after base bdev removal Don't stop the raid bdev if the minimum number of base bdevs are available. When removing a base bdev, first suspend the raid bdev and then perform the actual removal/cleanup. Finally, resume the raid bdev. Change-Id: Ie010d3760c32b0dad455a5a2a0ab7adcc602edf9 Signed-off-by: Artur Paszkiewicz --- module/bdev/raid/bdev_raid.c | 126 +++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 28 deletions(-) diff --git a/module/bdev/raid/bdev_raid.c b/module/bdev/raid/bdev_raid.c index 7af243116..c606ff7f7 100644 --- a/module/bdev/raid/bdev_raid.c +++ b/module/bdev/raid/bdev_raid.c @@ -89,7 +89,6 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) pthread_mutex_lock(&raid_bdev->mutex); raid_ch->is_suspended = (raid_bdev->suspend_cnt > 0); - pthread_mutex_unlock(&raid_bdev->mutex); for (i = 0; i < raid_ch->num_channels; i++) { /* @@ -97,6 +96,9 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) * split logic to send the respective child bdev ios to respective base * bdev io channel. */ + if (raid_bdev->base_bdev_info[i].desc == NULL) { + continue; + } raid_ch->base_channel[i] = spdk_bdev_get_io_channel( raid_bdev->base_bdev_info[i].desc); if (!raid_ch->base_channel[i]) { @@ -105,6 +107,7 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) break; } } + pthread_mutex_unlock(&raid_bdev->mutex); if (!ret && raid_bdev->module->get_io_channel) { raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); @@ -115,10 +118,10 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf) } if (ret) { - uint8_t j; - - for (j = 0; j < i; j++) { - spdk_put_io_channel(raid_ch->base_channel[j]); + for (i = 0; i < raid_ch->num_channels; i++) { + if (raid_ch->base_channel[i] != NULL) { + spdk_put_io_channel(raid_ch->base_channel[i]); + } } free(raid_ch->base_channel); raid_ch->base_channel = NULL; @@ -154,8 +157,9 @@ raid_bdev_destroy_cb(void *io_device, void *ctx_buf) for (i = 0; i < raid_ch->num_channels; i++) { /* Free base bdev channels */ - assert(raid_ch->base_channel[i] != NULL); - spdk_put_io_channel(raid_ch->base_channel[i]); + if (raid_ch->base_channel[i] != NULL) { + spdk_put_io_channel(raid_ch->base_channel[i]); + } } free(raid_ch->base_channel); raid_ch->base_channel = NULL; @@ -420,10 +424,14 @@ raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io) raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; } - while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { - i = raid_io->base_bdev_io_submitted; + for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) { base_info = &raid_bdev->base_bdev_info[i]; base_ch = raid_io->raid_ch->base_channel[i]; + if (base_ch == NULL) { + raid_io->base_bdev_io_submitted++; + raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS); + continue; + } ret = spdk_bdev_reset(base_info->desc, base_ch, raid_base_bdev_reset_complete, raid_io); if (ret == 0) { @@ -547,7 +555,6 @@ _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { if (base_info->bdev == NULL) { - assert(false); continue; } @@ -704,7 +711,9 @@ raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int struct raid_bdev *raid_bdev = ctx; struct spdk_bdev *base_bdev; uint32_t i; - int domains_count = 0, rc; + int domains_count = 0, rc = 0; + + pthread_mutex_lock(&raid_bdev->mutex); if (raid_bdev->module->memory_domains_supported == false) { return 0; @@ -713,26 +722,38 @@ raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int /* First loop to get the number of memory domains */ for (i = 0; i < raid_bdev->num_base_bdevs; i++) { base_bdev = raid_bdev->base_bdev_info[i].bdev; + if (base_bdev == NULL) { + continue; + } rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); if (rc < 0) { - return rc; + goto out; } domains_count += rc; } if (!domains || array_size < domains_count) { - return domains_count; + goto out; } for (i = 0; i < raid_bdev->num_base_bdevs; i++) { base_bdev = raid_bdev->base_bdev_info[i].bdev; + if (base_bdev == NULL) { + continue; + } rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); if (rc < 0) { - return rc; + goto out; } domains += rc; array_size -= rc; } +out: + pthread_mutex_unlock(&raid_bdev->mutex); + + if (rc < 0) { + return rc; + } return domains_count; } @@ -1229,7 +1250,6 @@ raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, return; } - assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered); raid_bdev->state = RAID_BDEV_STATE_OFFLINE; assert(raid_bdev->num_base_bdevs_discovered); SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); @@ -1336,9 +1356,6 @@ raid_bdev_suspend_continue(struct spdk_io_channel_iter *i, int status) raid_bdev_dec_suspend_num_channels(raid_bdev); } -static int raid_bdev_suspend(struct raid_bdev *raid_bdev, raid_bdev_suspended_cb cb, - void *cb_ctx) __attribute__((unused)); - static int raid_bdev_suspend(struct raid_bdev *raid_bdev, raid_bdev_suspended_cb cb, void *cb_ctx) { @@ -1396,8 +1413,6 @@ raid_bdev_channel_resume(struct spdk_io_channel_iter *i) spdk_for_each_channel_continue(i, 0); } -static void raid_bdev_resume(struct raid_bdev *raid_bdev) __attribute__((unused)); - static void raid_bdev_resume(struct raid_bdev *raid_bdev) { @@ -1413,6 +1428,46 @@ raid_bdev_resume(struct raid_bdev *raid_bdev) } } +static void +raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i) +{ + struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); + struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); + struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch); + uint8_t idx = base_info - base_info->raid_bdev->base_bdev_info; + + SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch); + + if (raid_ch->base_channel[idx] != NULL) { + spdk_put_io_channel(raid_ch->base_channel[idx]); + raid_ch->base_channel[idx] = NULL; + } + + spdk_for_each_channel_continue(i, 0); +} + +static void +raid_bdev_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status) +{ + struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i); + + raid_bdev_resume(base_info->raid_bdev); +} + +static void +raid_bdev_remove_base_bdev_on_suspended(struct raid_bdev *raid_bdev, void *ctx) +{ + struct raid_base_bdev_info *base_info = ctx; + + pthread_mutex_lock(&raid_bdev->mutex); + base_info->remove_scheduled = false; + raid_bdev_free_base_bdev_resource(base_info); + pthread_mutex_unlock(&raid_bdev->mutex); + + spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info, + raid_bdev_remove_base_bdev_done); +} + /* * brief: * raid_bdev_remove_base_bdev function is called by below layers when base_bdev @@ -1421,26 +1476,31 @@ raid_bdev_resume(struct raid_bdev *raid_bdev) * params: * base_bdev - pointer to base bdev which got removed * returns: - * none + * 0 - success + * non zero - failure */ -static void +static int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) { struct raid_bdev *raid_bdev; struct raid_base_bdev_info *base_info; - SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); + SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name); /* Find the raid_bdev which has claimed this base_bdev */ base_info = raid_bdev_find_base_info_by_bdev(base_bdev); if (!base_info) { SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); - return; + return -ENODEV; } raid_bdev = base_info->raid_bdev; assert(spdk_get_thread() == spdk_thread_get_app_thread()); + if (base_info->remove_scheduled) { + return 0; + } + assert(base_info->desc); base_info->remove_scheduled = true; @@ -1453,11 +1513,14 @@ raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) if (raid_bdev->num_base_bdevs_discovered == 0) { /* There is no base bdev for this raid, so free the raid device. */ raid_bdev_cleanup_and_free(raid_bdev); - return; } + } else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) { + raid_bdev_deconfigure(raid_bdev, NULL, NULL); + } else { + return raid_bdev_suspend(raid_bdev, raid_bdev_remove_base_bdev_on_suspended, base_info); } - raid_bdev_deconfigure(raid_bdev, NULL, NULL); + return 0; } /* @@ -1512,9 +1575,15 @@ static void raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) { + int rc; + switch (type) { case SPDK_BDEV_EVENT_REMOVE: - raid_bdev_remove_base_bdev(bdev); + rc = raid_bdev_remove_base_bdev(bdev); + if (rc != 0) { + SPDK_ERRLOG("Failed to remove base bdev %s: %s\n", + spdk_bdev_get_name(bdev), spdk_strerror(-rc)); + } break; case SPDK_BDEV_EVENT_RESIZE: raid_bdev_resize_base_bdev(bdev); @@ -1709,7 +1778,8 @@ raid_bdev_examine(struct spdk_bdev *bdev) TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { - if (base_info->bdev == NULL && strcmp(bdev->name, base_info->name) == 0) { + if (base_info->bdev == NULL && base_info->name != NULL && + strcmp(bdev->name, base_info->name) == 0) { raid_bdev_configure_base_bdev(base_info); break; }