Implemented the functionality to add a new base bdev to an existing raid bdev, growing the size of the raid if needed. Actually only raid1 supports growing with a new base bdev. Signed-off-by: Damiano Cipriani <damiano.cipriani@suse.com>
352 lines
8.9 KiB
C
352 lines
8.9 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (C) 2022 Intel Corporation.
|
|
* All rights reserved.
|
|
*/
|
|
|
|
#include "bdev_raid.h"
|
|
|
|
#include "spdk/likely.h"
|
|
#include "spdk/log.h"
|
|
|
|
struct raid1_info {
|
|
/* The parent raid bdev */
|
|
struct raid_bdev *raid_bdev;
|
|
};
|
|
|
|
struct raid1_io_channel {
|
|
/* Index of last base bdev used for reads */
|
|
uint8_t base_bdev_read_idx;
|
|
|
|
/* Read bandwidths generated for base_bdevs */
|
|
uint64_t *base_bdev_read_bw;
|
|
|
|
/* Maximum read bandwidth from all base_bdevs */
|
|
uint64_t base_bdev_max_read_bw;
|
|
};
|
|
|
|
static void
|
|
raid1_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct raid_bdev_io *raid_io = cb_arg;
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
raid_bdev_io_complete_part(raid_io, 1, success ?
|
|
SPDK_BDEV_IO_STATUS_SUCCESS :
|
|
SPDK_BDEV_IO_STATUS_FAILED);
|
|
}
|
|
|
|
static void raid1_submit_rw_request(struct raid_bdev_io *raid_io);
|
|
|
|
static void
|
|
_raid1_submit_rw_request(void *_raid_io)
|
|
{
|
|
struct raid_bdev_io *raid_io = _raid_io;
|
|
|
|
raid1_submit_rw_request(raid_io);
|
|
}
|
|
|
|
static void
|
|
raid1_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts)
|
|
{
|
|
memset(opts, 0, sizeof(*opts));
|
|
opts->size = sizeof(*opts);
|
|
opts->memory_domain = bdev_io->u.bdev.memory_domain;
|
|
opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx;
|
|
opts->metadata = bdev_io->u.bdev.md_buf;
|
|
}
|
|
|
|
static uint8_t
|
|
raid1_channel_next_read_base_bdev(struct raid_bdev_io_channel *raid_ch)
|
|
{
|
|
struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
|
|
uint8_t idx = raid1_ch->base_bdev_read_idx;
|
|
uint8_t i;
|
|
|
|
for (i = 0; i < raid_ch->num_channels; i++) {
|
|
if (++idx == raid_ch->num_channels) {
|
|
idx = 0;
|
|
}
|
|
|
|
if (raid_ch->base_channel[idx]) {
|
|
raid1_ch->base_bdev_read_idx = idx;
|
|
|
|
if (raid1_ch->base_bdev_read_bw[idx] < raid1_ch->base_bdev_max_read_bw) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return raid1_ch->base_bdev_read_idx;
|
|
}
|
|
|
|
static void
|
|
raid1_channel_update_read_bw_counters(struct raid_bdev_io_channel *raid_ch, uint64_t pd_blocks)
|
|
{
|
|
struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
|
|
uint8_t idx = raid1_ch->base_bdev_read_idx;
|
|
uint8_t i;
|
|
|
|
if (spdk_unlikely(raid1_ch->base_bdev_max_read_bw > UINT64_MAX - pd_blocks)) {
|
|
for (i = 0; i < raid_ch->num_channels; i++) {
|
|
raid1_ch->base_bdev_read_bw[i] = 0;
|
|
}
|
|
raid1_ch->base_bdev_max_read_bw = 0;
|
|
}
|
|
|
|
raid1_ch->base_bdev_read_bw[idx] += pd_blocks;
|
|
raid1_ch->base_bdev_max_read_bw = spdk_max(raid1_ch->base_bdev_max_read_bw,
|
|
raid1_ch->base_bdev_read_bw[idx]);
|
|
}
|
|
|
|
static int
|
|
raid1_submit_read_request(struct raid_bdev_io *raid_io)
|
|
{
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
|
struct spdk_bdev_ext_io_opts io_opts;
|
|
struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
|
|
struct raid_base_bdev_info *base_info;
|
|
struct spdk_io_channel *base_ch = NULL;
|
|
uint64_t pd_lba, pd_blocks;
|
|
uint8_t idx;
|
|
int ret;
|
|
|
|
pd_lba = bdev_io->u.bdev.offset_blocks;
|
|
pd_blocks = bdev_io->u.bdev.num_blocks;
|
|
|
|
idx = raid1_channel_next_read_base_bdev(raid_ch);
|
|
|
|
if (spdk_unlikely(raid_ch->base_channel[idx] == NULL)) {
|
|
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
return 0;
|
|
}
|
|
|
|
raid1_channel_update_read_bw_counters(raid_ch, pd_blocks);
|
|
|
|
base_info = &raid_bdev->base_bdev_info[idx];
|
|
base_ch = raid_io->raid_ch->base_channel[idx];
|
|
|
|
raid_io->base_bdev_io_remaining = 1;
|
|
|
|
raid1_init_ext_io_opts(bdev_io, &io_opts);
|
|
ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
|
|
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
|
|
pd_lba, pd_blocks, raid1_bdev_io_completion,
|
|
raid_io, &io_opts);
|
|
|
|
if (spdk_likely(ret == 0)) {
|
|
raid_io->base_bdev_io_submitted++;
|
|
} else if (spdk_unlikely(ret == -ENOMEM)) {
|
|
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
|
|
_raid1_submit_rw_request);
|
|
return 0;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
raid1_submit_write_request(struct raid_bdev_io *raid_io)
|
|
{
|
|
struct raid_bdev *raid_bdev = raid_io->raid_bdev;
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
|
struct spdk_bdev_ext_io_opts io_opts;
|
|
struct raid_base_bdev_info *base_info;
|
|
struct spdk_io_channel *base_ch;
|
|
uint64_t pd_lba, pd_blocks;
|
|
uint8_t idx;
|
|
uint64_t base_bdev_io_not_submitted;
|
|
int ret = 0;
|
|
|
|
pd_lba = bdev_io->u.bdev.offset_blocks;
|
|
pd_blocks = bdev_io->u.bdev.num_blocks;
|
|
|
|
if (raid_io->base_bdev_io_submitted == 0) {
|
|
raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
|
|
}
|
|
|
|
raid1_init_ext_io_opts(bdev_io, &io_opts);
|
|
for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) {
|
|
base_info = &raid_bdev->base_bdev_info[idx];
|
|
base_ch = raid_io->raid_ch->base_channel[idx];
|
|
|
|
if (base_ch == NULL) {
|
|
raid_io->base_bdev_io_submitted++;
|
|
raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
|
|
continue;
|
|
}
|
|
|
|
ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
|
|
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
|
|
pd_lba, pd_blocks, raid1_bdev_io_completion,
|
|
raid_io, &io_opts);
|
|
if (spdk_unlikely(ret != 0)) {
|
|
if (spdk_unlikely(ret == -ENOMEM)) {
|
|
raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
|
|
_raid1_submit_rw_request);
|
|
return 0;
|
|
}
|
|
|
|
base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
|
|
raid_io->base_bdev_io_submitted;
|
|
raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
|
|
SPDK_BDEV_IO_STATUS_FAILED);
|
|
return 0;
|
|
}
|
|
|
|
raid_io->base_bdev_io_submitted++;
|
|
}
|
|
|
|
if (raid_io->base_bdev_io_submitted == 0) {
|
|
ret = -ENODEV;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
raid1_submit_rw_request(struct raid_bdev_io *raid_io)
|
|
{
|
|
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
|
|
int ret;
|
|
|
|
switch (bdev_io->type) {
|
|
case SPDK_BDEV_IO_TYPE_READ:
|
|
ret = raid1_submit_read_request(raid_io);
|
|
break;
|
|
case SPDK_BDEV_IO_TYPE_WRITE:
|
|
ret = raid1_submit_write_request(raid_io);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
if (spdk_unlikely(ret != 0)) {
|
|
raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
|
|
}
|
|
}
|
|
|
|
static void
|
|
raid1_ioch_destroy(void *io_device, void *ctx_buf)
|
|
{
|
|
struct raid1_io_channel *r1ch = ctx_buf;
|
|
|
|
free(r1ch->base_bdev_read_bw);
|
|
}
|
|
|
|
static int
|
|
raid1_ioch_create(void *io_device, void *ctx_buf)
|
|
{
|
|
struct raid1_io_channel *r1ch = ctx_buf;
|
|
struct raid1_info *r1info = io_device;
|
|
struct raid_bdev *raid_bdev = r1info->raid_bdev;
|
|
int status = 0;
|
|
|
|
r1ch->base_bdev_read_idx = 0;
|
|
r1ch->base_bdev_max_read_bw = 0;
|
|
r1ch->base_bdev_read_bw = calloc(raid_bdev->num_base_bdevs,
|
|
sizeof(*r1ch->base_bdev_read_bw));
|
|
if (!r1ch->base_bdev_read_bw) {
|
|
SPDK_ERRLOG("Failed to initialize io channel\n");
|
|
status = -ENOMEM;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static void
|
|
raid1_io_device_unregister_done(void *io_device)
|
|
{
|
|
struct raid1_info *r1info = io_device;
|
|
|
|
raid_bdev_module_stop_done(r1info->raid_bdev);
|
|
|
|
free(r1info);
|
|
}
|
|
|
|
static int
|
|
raid1_start(struct raid_bdev *raid_bdev)
|
|
{
|
|
uint64_t min_blockcnt = UINT64_MAX;
|
|
struct raid_base_bdev_info *base_info;
|
|
struct raid1_info *r1info;
|
|
|
|
r1info = calloc(1, sizeof(*r1info));
|
|
if (!r1info) {
|
|
SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n");
|
|
return -ENOMEM;
|
|
}
|
|
r1info->raid_bdev = raid_bdev;
|
|
|
|
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
|
min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
|
|
}
|
|
|
|
RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
|
|
base_info->data_size = min_blockcnt;
|
|
}
|
|
|
|
raid_bdev->bdev.blockcnt = min_blockcnt;
|
|
raid_bdev->module_private = r1info;
|
|
|
|
spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy,
|
|
sizeof(struct raid1_io_channel), NULL);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool
|
|
raid1_stop(struct raid_bdev *raid_bdev)
|
|
{
|
|
struct raid1_info *r1info = raid_bdev->module_private;
|
|
|
|
spdk_io_device_unregister(r1info, raid1_io_device_unregister_done);
|
|
|
|
return false;
|
|
}
|
|
|
|
static struct spdk_io_channel *
|
|
raid1_get_io_channel(struct raid_bdev *raid_bdev)
|
|
{
|
|
struct raid1_info *r1info = raid_bdev->module_private;
|
|
|
|
return spdk_get_io_channel(r1info);
|
|
}
|
|
|
|
static bool
|
|
channel_grow_base_bdev(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch)
|
|
{
|
|
struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
|
|
void *tmp;
|
|
|
|
tmp = realloc(raid1_ch->base_bdev_read_bw,
|
|
raid_bdev->num_base_bdevs * sizeof(*raid1_ch->base_bdev_read_bw));
|
|
if (!tmp) {
|
|
SPDK_ERRLOG("Unable to reallocate raid1 channel base_bdev_modes_read_bw\n");
|
|
return false;
|
|
}
|
|
memset(tmp + raid_ch->num_channels * sizeof(*raid1_ch->base_bdev_read_bw), 0,
|
|
sizeof(*raid1_ch->base_bdev_read_bw));
|
|
raid1_ch->base_bdev_read_bw = tmp;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
static struct raid_bdev_module g_raid1_module = {
|
|
.level = RAID1,
|
|
.base_bdevs_min = 1,
|
|
.base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1},
|
|
.memory_domains_supported = true,
|
|
.start = raid1_start,
|
|
.stop = raid1_stop,
|
|
.submit_rw_request = raid1_submit_rw_request,
|
|
.get_io_channel = raid1_get_io_channel,
|
|
.channel_grow_base_bdev = channel_grow_base_bdev,
|
|
};
|
|
RAID_MODULE_REGISTER(&g_raid1_module)
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT(bdev_raid1)
|