module/raid: continue operation after base bdev removal

Don't stop the raid bdev if the minimum number of base bdevs are
available.

When removing a base bdev, first suspend the raid bdev and then perform
the actual removal/cleanup. Finally, resume the raid bdev.

Change-Id: Ie010d3760c32b0dad455a5a2a0ab7adcc602edf9
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
This commit is contained in:
Artur Paszkiewicz 2022-09-28 12:06:31 +02:00 committed by David Ko
parent f71c255827
commit 65f2af1dcb

View File

@ -89,7 +89,6 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
pthread_mutex_lock(&raid_bdev->mutex); pthread_mutex_lock(&raid_bdev->mutex);
raid_ch->is_suspended = (raid_bdev->suspend_cnt > 0); raid_ch->is_suspended = (raid_bdev->suspend_cnt > 0);
pthread_mutex_unlock(&raid_bdev->mutex);
for (i = 0; i < raid_ch->num_channels; i++) { for (i = 0; i < raid_ch->num_channels; i++) {
/* /*
@ -97,6 +96,9 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
* split logic to send the respective child bdev ios to respective base * split logic to send the respective child bdev ios to respective base
* bdev io channel. * bdev io channel.
*/ */
if (raid_bdev->base_bdev_info[i].desc == NULL) {
continue;
}
raid_ch->base_channel[i] = spdk_bdev_get_io_channel( raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
raid_bdev->base_bdev_info[i].desc); raid_bdev->base_bdev_info[i].desc);
if (!raid_ch->base_channel[i]) { if (!raid_ch->base_channel[i]) {
@ -105,6 +107,7 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
break; break;
} }
} }
pthread_mutex_unlock(&raid_bdev->mutex);
if (!ret && raid_bdev->module->get_io_channel) { if (!ret && raid_bdev->module->get_io_channel) {
raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev); raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev);
@ -115,10 +118,10 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
} }
if (ret) { if (ret) {
uint8_t j; for (i = 0; i < raid_ch->num_channels; i++) {
if (raid_ch->base_channel[i] != NULL) {
for (j = 0; j < i; j++) { spdk_put_io_channel(raid_ch->base_channel[i]);
spdk_put_io_channel(raid_ch->base_channel[j]); }
} }
free(raid_ch->base_channel); free(raid_ch->base_channel);
raid_ch->base_channel = NULL; raid_ch->base_channel = NULL;
@ -154,8 +157,9 @@ raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
for (i = 0; i < raid_ch->num_channels; i++) { for (i = 0; i < raid_ch->num_channels; i++) {
/* Free base bdev channels */ /* Free base bdev channels */
assert(raid_ch->base_channel[i] != NULL); if (raid_ch->base_channel[i] != NULL) {
spdk_put_io_channel(raid_ch->base_channel[i]); spdk_put_io_channel(raid_ch->base_channel[i]);
}
} }
free(raid_ch->base_channel); free(raid_ch->base_channel);
raid_ch->base_channel = NULL; raid_ch->base_channel = NULL;
@ -420,10 +424,14 @@ raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
} }
while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) { for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) {
i = raid_io->base_bdev_io_submitted;
base_info = &raid_bdev->base_bdev_info[i]; base_info = &raid_bdev->base_bdev_info[i];
base_ch = raid_io->raid_ch->base_channel[i]; base_ch = raid_io->raid_ch->base_channel[i];
if (base_ch == NULL) {
raid_io->base_bdev_io_submitted++;
raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
continue;
}
ret = spdk_bdev_reset(base_info->desc, base_ch, ret = spdk_bdev_reset(base_info->desc, base_ch,
raid_base_bdev_reset_complete, raid_io); raid_base_bdev_reset_complete, raid_io);
if (ret == 0) { if (ret == 0) {
@ -547,7 +555,6 @@ _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
if (base_info->bdev == NULL) { if (base_info->bdev == NULL) {
assert(false);
continue; continue;
} }
@ -704,7 +711,9 @@ raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int
struct raid_bdev *raid_bdev = ctx; struct raid_bdev *raid_bdev = ctx;
struct spdk_bdev *base_bdev; struct spdk_bdev *base_bdev;
uint32_t i; uint32_t i;
int domains_count = 0, rc; int domains_count = 0, rc = 0;
pthread_mutex_lock(&raid_bdev->mutex);
if (raid_bdev->module->memory_domains_supported == false) { if (raid_bdev->module->memory_domains_supported == false) {
return 0; return 0;
@ -713,26 +722,38 @@ raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int
/* First loop to get the number of memory domains */ /* First loop to get the number of memory domains */
for (i = 0; i < raid_bdev->num_base_bdevs; i++) { for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
base_bdev = raid_bdev->base_bdev_info[i].bdev; base_bdev = raid_bdev->base_bdev_info[i].bdev;
if (base_bdev == NULL) {
continue;
}
rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0); rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0);
if (rc < 0) { if (rc < 0) {
return rc; goto out;
} }
domains_count += rc; domains_count += rc;
} }
if (!domains || array_size < domains_count) { if (!domains || array_size < domains_count) {
return domains_count; goto out;
} }
for (i = 0; i < raid_bdev->num_base_bdevs; i++) { for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
base_bdev = raid_bdev->base_bdev_info[i].bdev; base_bdev = raid_bdev->base_bdev_info[i].bdev;
if (base_bdev == NULL) {
continue;
}
rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size); rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size);
if (rc < 0) { if (rc < 0) {
return rc; goto out;
} }
domains += rc; domains += rc;
array_size -= rc; array_size -= rc;
} }
out:
pthread_mutex_unlock(&raid_bdev->mutex);
if (rc < 0) {
return rc;
}
return domains_count; return domains_count;
} }
@ -1229,7 +1250,6 @@ raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
return; return;
} }
assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
raid_bdev->state = RAID_BDEV_STATE_OFFLINE; raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
assert(raid_bdev->num_base_bdevs_discovered); assert(raid_bdev->num_base_bdevs_discovered);
SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n"); SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n");
@ -1336,9 +1356,6 @@ raid_bdev_suspend_continue(struct spdk_io_channel_iter *i, int status)
raid_bdev_dec_suspend_num_channels(raid_bdev); raid_bdev_dec_suspend_num_channels(raid_bdev);
} }
static int raid_bdev_suspend(struct raid_bdev *raid_bdev, raid_bdev_suspended_cb cb,
void *cb_ctx) __attribute__((unused));
static int static int
raid_bdev_suspend(struct raid_bdev *raid_bdev, raid_bdev_suspended_cb cb, void *cb_ctx) raid_bdev_suspend(struct raid_bdev *raid_bdev, raid_bdev_suspended_cb cb, void *cb_ctx)
{ {
@ -1396,8 +1413,6 @@ raid_bdev_channel_resume(struct spdk_io_channel_iter *i)
spdk_for_each_channel_continue(i, 0); spdk_for_each_channel_continue(i, 0);
} }
static void raid_bdev_resume(struct raid_bdev *raid_bdev) __attribute__((unused));
static void static void
raid_bdev_resume(struct raid_bdev *raid_bdev) raid_bdev_resume(struct raid_bdev *raid_bdev)
{ {
@ -1413,6 +1428,46 @@ raid_bdev_resume(struct raid_bdev *raid_bdev)
} }
} }
static void
raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i)
{
struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i);
struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
uint8_t idx = base_info - base_info->raid_bdev->base_bdev_info;
SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch);
if (raid_ch->base_channel[idx] != NULL) {
spdk_put_io_channel(raid_ch->base_channel[idx]);
raid_ch->base_channel[idx] = NULL;
}
spdk_for_each_channel_continue(i, 0);
}
static void
raid_bdev_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status)
{
struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i);
raid_bdev_resume(base_info->raid_bdev);
}
static void
raid_bdev_remove_base_bdev_on_suspended(struct raid_bdev *raid_bdev, void *ctx)
{
struct raid_base_bdev_info *base_info = ctx;
pthread_mutex_lock(&raid_bdev->mutex);
base_info->remove_scheduled = false;
raid_bdev_free_base_bdev_resource(base_info);
pthread_mutex_unlock(&raid_bdev->mutex);
spdk_for_each_channel(raid_bdev, raid_bdev_channel_remove_base_bdev, base_info,
raid_bdev_remove_base_bdev_done);
}
/* /*
* brief: * brief:
* raid_bdev_remove_base_bdev function is called by below layers when base_bdev * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
@ -1421,26 +1476,31 @@ raid_bdev_resume(struct raid_bdev *raid_bdev)
* params: * params:
* base_bdev - pointer to base bdev which got removed * base_bdev - pointer to base bdev which got removed
* returns: * returns:
* none * 0 - success
* non zero - failure
*/ */
static void static int
raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
{ {
struct raid_bdev *raid_bdev; struct raid_bdev *raid_bdev;
struct raid_base_bdev_info *base_info; struct raid_base_bdev_info *base_info;
SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n"); SPDK_DEBUGLOG(bdev_raid, "%s\n", base_bdev->name);
/* Find the raid_bdev which has claimed this base_bdev */ /* Find the raid_bdev which has claimed this base_bdev */
base_info = raid_bdev_find_base_info_by_bdev(base_bdev); base_info = raid_bdev_find_base_info_by_bdev(base_bdev);
if (!base_info) { if (!base_info) {
SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
return; return -ENODEV;
} }
raid_bdev = base_info->raid_bdev; raid_bdev = base_info->raid_bdev;
assert(spdk_get_thread() == spdk_thread_get_app_thread()); assert(spdk_get_thread() == spdk_thread_get_app_thread());
if (base_info->remove_scheduled) {
return 0;
}
assert(base_info->desc); assert(base_info->desc);
base_info->remove_scheduled = true; base_info->remove_scheduled = true;
@ -1453,11 +1513,14 @@ raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
if (raid_bdev->num_base_bdevs_discovered == 0) { if (raid_bdev->num_base_bdevs_discovered == 0) {
/* There is no base bdev for this raid, so free the raid device. */ /* There is no base bdev for this raid, so free the raid device. */
raid_bdev_cleanup_and_free(raid_bdev); raid_bdev_cleanup_and_free(raid_bdev);
return;
} }
} else if (raid_bdev->num_base_bdevs_discovered == raid_bdev->min_base_bdevs_operational) {
raid_bdev_deconfigure(raid_bdev, NULL, NULL);
} else {
return raid_bdev_suspend(raid_bdev, raid_bdev_remove_base_bdev_on_suspended, base_info);
} }
raid_bdev_deconfigure(raid_bdev, NULL, NULL); return 0;
} }
/* /*
@ -1512,9 +1575,15 @@ static void
raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
void *event_ctx) void *event_ctx)
{ {
int rc;
switch (type) { switch (type) {
case SPDK_BDEV_EVENT_REMOVE: case SPDK_BDEV_EVENT_REMOVE:
raid_bdev_remove_base_bdev(bdev); rc = raid_bdev_remove_base_bdev(bdev);
if (rc != 0) {
SPDK_ERRLOG("Failed to remove base bdev %s: %s\n",
spdk_bdev_get_name(bdev), spdk_strerror(-rc));
}
break; break;
case SPDK_BDEV_EVENT_RESIZE: case SPDK_BDEV_EVENT_RESIZE:
raid_bdev_resize_base_bdev(bdev); raid_bdev_resize_base_bdev(bdev);
@ -1709,7 +1778,8 @@ raid_bdev_examine(struct spdk_bdev *bdev)
TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) { TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
if (base_info->bdev == NULL && strcmp(bdev->name, base_info->name) == 0) { if (base_info->bdev == NULL && base_info->name != NULL &&
strcmp(bdev->name, base_info->name) == 0) {
raid_bdev_configure_base_bdev(base_info); raid_bdev_configure_base_bdev(base_info);
break; break;
} }