Spdk/module/bdev/raid/bdev_raid.h
Damiano Cipriani fec1a42b82 module/raid: raid grow adding a new base bdev
Implemented the functionality to add a new base bdev to an existing
raid bdev, growing the size of the raid if needed.
Actually only raid1 supports growing with a new base bdev.

Signed-off-by: Damiano Cipriani <damiano.cipriani@suse.com>
2023-09-19 09:57:48 +02:00

392 lines
12 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2018 Intel Corporation.
* All rights reserved.
*/
#ifndef SPDK_BDEV_RAID_INTERNAL_H
#define SPDK_BDEV_RAID_INTERNAL_H
#include "spdk/bdev_module.h"
#include "spdk/uuid.h"
#include "bdev_raid_sb.h"
#define RAID_BDEV_MIN_DATA_OFFSET_SIZE (1024*1024) /* 1 MiB */
SPDK_STATIC_ASSERT(RAID_BDEV_SB_MAX_LENGTH < RAID_BDEV_MIN_DATA_OFFSET_SIZE,
"Incorrect min data offset");
enum raid_level {
INVALID_RAID_LEVEL = -1,
RAID0 = 0,
RAID1 = 1,
RAID5F = 95, /* 0x5f */
CONCAT = 99,
};
/*
* Raid state describes the state of the raid. This raid bdev can be either in
* configured list or configuring list
*/
enum raid_bdev_state {
/* raid bdev is ready and is seen by upper layers */
RAID_BDEV_STATE_ONLINE,
/*
* raid bdev is configuring, not all underlying bdevs are present.
* And can't be seen by upper layers.
*/
RAID_BDEV_STATE_CONFIGURING,
/*
* In offline state, raid bdev layer will complete all incoming commands without
* submitting to underlying base nvme bdevs
*/
RAID_BDEV_STATE_OFFLINE,
/* raid bdev state max, new states should be added before this */
RAID_BDEV_STATE_MAX
};
/*
* raid_base_bdev_info contains information for the base bdevs which are part of some
* raid. This structure contains the per base bdev information. Whatever is
* required per base device for raid bdev will be kept here
*/
struct raid_base_bdev_info {
/* The raid bdev that this base bdev belongs to */
struct raid_bdev *raid_bdev;
/* name of the bdev */
char *name;
/* uuid of the bdev */
struct spdk_uuid uuid;
/* pointer to base spdk bdev */
struct spdk_bdev *bdev;
/* pointer to base bdev descriptor opened by raid bdev */
struct spdk_bdev_desc *desc;
/* data offset for raid bdev [blocks] */
uint64_t data_offset;
/* data size of for raid bdev [blocks] */
uint64_t data_size;
/*
* When underlying base device calls the hot plug function on drive removal,
* this flag will be set and later after doing some processing, base device
* descriptor will be closed
*/
bool remove_scheduled;
/* Hold the number of blocks to know how large the base bdev is resized. */
uint64_t blockcnt;
/* io channel for the app thread */
struct spdk_io_channel *app_thread_ch;
/* Set to true when base bdev has completed the configuration process */
bool is_configured;
};
/*
* raid_bdev_io is the context part of bdev_io. It contains the information
* related to bdev_io for a raid bdev
*/
struct raid_bdev_io {
/* The raid bdev associated with this IO */
struct raid_bdev *raid_bdev;
/* WaitQ entry, used only in waitq logic */
struct spdk_bdev_io_wait_entry waitq_entry;
/* Context of the original channel for this IO */
struct raid_bdev_io_channel *raid_ch;
/* Used for tracking progress on io requests sent to member disks. */
uint64_t base_bdev_io_remaining;
uint8_t base_bdev_io_submitted;
uint8_t base_bdev_io_status;
/* Private data for the raid module */
void *module_private;
TAILQ_ENTRY(raid_bdev_io) link;
};
/*
* raid_bdev is the single entity structure which contains SPDK block device
* and the information related to any raid bdev either configured or
* in configuring list. io device is created on this.
*/
struct raid_bdev {
/* raid bdev device, this will get registered in bdev layer */
struct spdk_bdev bdev;
/* link of raid bdev to link it to global raid bdev list */
TAILQ_ENTRY(raid_bdev) global_link;
/* array of base bdev info */
struct raid_base_bdev_info *base_bdev_info;
/* strip size of raid bdev in blocks */
uint32_t strip_size;
/* strip size of raid bdev in KB */
uint32_t strip_size_kb;
/* strip size bit shift for optimized calculation */
uint32_t strip_size_shift;
/* block length bit shift for optimized calculation */
uint32_t blocklen_shift;
/* state of raid bdev */
enum raid_bdev_state state;
/* number of base bdevs comprising raid bdev */
uint8_t num_base_bdevs;
/* number of base bdevs discovered */
uint8_t num_base_bdevs_discovered;
/* number of operational base bdevs */
uint8_t num_base_bdevs_operational;
/* minimum number of viable base bdevs that are required by array to operate */
uint8_t min_base_bdevs_operational;
/* Raid Level of this raid bdev */
enum raid_level level;
/* Set to true if destroy of this raid bdev is started. */
bool destroy_started;
/* Module for RAID-level specific operations */
struct raid_bdev_module *module;
/* Private data for the raid module */
void *module_private;
/* Counter of callers of raid_bdev_suspend() */
uint32_t suspend_cnt;
/* Number of channels remaining to suspend */
uint32_t suspend_num_channels;
/* List of suspend contexts */
TAILQ_HEAD(, raid_bdev_suspend_ctx) suspend_ctx;
/* Device mutex */
pthread_mutex_t mutex;
/* Superblock */
struct raid_bdev_superblock *sb;
/* Superblock write context */
void *sb_write_ctx;
/* A flag to indicate that an operation to add a base bdev is in progress */
bool base_bdev_updating;
};
#define RAID_FOR_EACH_BASE_BDEV(r, i) \
for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++)
/*
* raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
* contains the relationship of raid bdev io channel with base bdev io channels.
*/
struct raid_bdev_io_channel {
/* Array of IO channels of base bdevs */
struct spdk_io_channel **base_channel;
/* Number of IO channels */
uint8_t num_channels;
/* Private raid module IO channel */
struct spdk_io_channel *module_channel;
/* Number of raid IOs on this channel */
uint32_t num_ios;
/* Is the channel currently suspended */
bool is_suspended;
/* List of suspended IOs */
TAILQ_HEAD(, raid_bdev_io) suspended_ios;
};
/* TAIL head for raid bdev list */
TAILQ_HEAD(raid_all_tailq, raid_bdev);
extern struct raid_all_tailq g_raid_bdev_list;
typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc);
int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
enum raid_level level, struct raid_bdev **raid_bdev_out,
const struct spdk_uuid *uuid, bool superblock);
void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx);
int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot);
struct raid_bdev *raid_bdev_find_by_name(const char *name);
enum raid_level raid_bdev_str_to_level(const char *str);
const char *raid_bdev_level_to_str(enum raid_level level);
enum raid_bdev_state raid_bdev_str_to_state(const char *str);
const char *raid_bdev_state_to_str(enum raid_bdev_state state);
void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w);
int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev);
int raid_bdev_grow_base_bdev(struct raid_bdev *raid_bdev, char *base_bdev_name,
raid_bdev_destruct_cb cb_fn, void *cb_arg);
/*
* RAID module descriptor
*/
struct raid_bdev_module {
/* RAID level implemented by this module */
enum raid_level level;
/* Minimum required number of base bdevs. Must be > 0. */
uint8_t base_bdevs_min;
/*
* RAID constraint. Determines number of base bdevs that can be removed
* without failing the array.
*/
struct {
enum {
CONSTRAINT_UNSET = 0,
CONSTRAINT_MAX_BASE_BDEVS_REMOVED,
CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL,
} type;
uint8_t value;
} base_bdevs_constraint;
/* Set to true if this module supports memory domains. */
bool memory_domains_supported;
/*
* Called when the raid is starting, right before changing the state to
* online and registering the bdev. Parameters of the bdev like blockcnt
* should be set here.
*
* Non-zero return value will abort the startup process.
*/
int (*start)(struct raid_bdev *raid_bdev);
/*
* Called when the raid is stopping, right before changing the state to
* offline and unregistering the bdev. Optional.
*
* The function should return false if it is asynchronous. Then, after
* the async operation has completed and the module is fully stopped
* raid_bdev_module_stop_done() must be called.
*/
bool (*stop)(struct raid_bdev *raid_bdev);
/* Handler for R/W requests */
void (*submit_rw_request)(struct raid_bdev_io *raid_io);
/* Handler for requests without payload (flush, unmap). Optional. */
void (*submit_null_payload_request)(struct raid_bdev_io *raid_io);
/*
* Called when the bdev's IO channel is created to get the module's private IO channel.
* Optional.
*/
struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev);
/*
* Called when a base_bdev is resized to resize the raid if the condition
* is satisfied.
*/
void (*resize)(struct raid_bdev *raid_bdev);
TAILQ_ENTRY(raid_bdev_module) link;
bool (*channel_grow_base_bdev)(struct raid_bdev *raid_bdev,
struct raid_bdev_io_channel *raid_ch);
};
void raid_bdev_module_list_add(struct raid_bdev_module *raid_module);
#define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line)
#define __RAID_MODULE_REGISTER_(line) raid_module_register_##line
#define RAID_MODULE_REGISTER(_module) \
__attribute__((constructor)) static void \
__RAID_MODULE_REGISTER(__LINE__)(void) \
{ \
raid_bdev_module_list_add(_module); \
}
bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
enum spdk_bdev_io_status status);
void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn);
void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status);
void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev);
/**
* Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function.
*/
static inline int
raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
struct spdk_bdev_desc *desc = base_info->desc;
uint64_t offset = base_info->data_offset + offset_blocks;
return spdk_bdev_readv_blocks_ext(desc, ch, iov, iovcnt, offset, num_blocks, cb, cb_arg, opts);
}
/**
* Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function.
*/
static inline int
raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, uint64_t offset_blocks,
uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts)
{
struct spdk_bdev_desc *desc = base_info->desc;
uint64_t offset = base_info->data_offset + offset_blocks;
return spdk_bdev_writev_blocks_ext(desc, ch, iov, iovcnt, offset, num_blocks, cb, cb_arg, opts);
}
/**
* Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function.
*/
static inline int
raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_desc *desc = base_info->desc;
uint64_t offset = base_info->data_offset + offset_blocks;
return spdk_bdev_unmap_blocks(desc, ch, offset, num_blocks, cb, cb_arg);
}
/**
* Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function.
*/
static inline int
raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct spdk_bdev_desc *desc = base_info->desc;
uint64_t offset = base_info->data_offset + offset_blocks;
return spdk_bdev_flush_blocks(desc, ch, offset, num_blocks, cb, cb_arg);
}
#endif /* SPDK_BDEV_RAID_INTERNAL_H */