blob: add IO channels for esnap clones

The channel passed to blob IO operations is useful for tracking
operations within the blobstore and the bs_dev that the blobstore
resides on. Esnap clone blobs perform reads from other bs_devs and
require per-thread, per-bs_dev channels.

This commit augments struct spdk_bs_channel with a tree containing
channels for the external snapshot bs_devs. The tree is indexed by blob
ID. These "esnap channels" are lazily created on the first read from an
external snapshot via each bs_channel. They are removed as bs_channels
are destroyed and blobs are closed.

Change-Id: I97aebe5a2f3584bfbf3a10ede8f3128448d30d6e
Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14974
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Mike Gerdts 2022-09-23 06:48:29 -05:00 committed by Jim Harris
parent d0516312ff
commit b47cee6c96
7 changed files with 886 additions and 28 deletions

View File

@ -129,6 +129,11 @@ Channels are an SPDK-wide abstraction and with Blobstore the best way to think a
required in order to do IO. The application will perform IO to the channel and channels are best thought of as being
associated 1:1 with a thread.
With external snapshots (see @ref blob_pg_esnap_and_esnap_clone), a read from a blob may lead to
reading from the device containing the blobstore or an external snapshot device. To support this,
each blobstore IO channel maintains a tree of channels to be used when reading from external
snapshot devices.
### Blob Identifiers
When an application creates a blob, it does not provide a name as is the case with many other similar
@ -465,6 +470,13 @@ of IO. They are an internal construct only and are pre-allocated on a per channe
earlier). They are removed from a channel associated linked list when the set (sequence or batch) is started and
then returned to the list when completed.
Each request set maintains a reference to a `channel` and a `back_channel`. The `channel` is used
for performing IO on the blobstore device. The `back_channel` is used for performing IO on the
blob's back device, `blob->back_bs_dev`. For blobs that are not esnap clones, `channel` and
`back_channel` reference an IO channel used with the device that contains the blobstore. For blobs
that are esnap clones, `channel` is the same as with any other blob and `back_channel` is an IO
channel for the external snapshot device.
### Key Internal Structures
`blobstore.h` contains many of the key structures for the internal workings of Blobstore. Only a few notable ones

View File

@ -40,6 +40,24 @@ static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool inte
static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
/*
* External snapshots require a channel per thread per esnap bdev. The tree
* is populated lazily as blob IOs are handled by the back_bs_dev. When this
* channel is destroyed, all the channels in the tree are destroyed.
*/
struct blob_esnap_channel {
RB_ENTRY(blob_esnap_channel) node;
spdk_blob_id blob_id;
struct spdk_io_channel *channel;
};
static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob,
spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
static inline bool
blob_is_esnap_clone(const struct spdk_blob *blob)
{
@ -339,10 +357,33 @@ blob_free(struct spdk_blob *blob)
free(blob);
}
static void
blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
{
struct spdk_bs_dev *bs_dev = ctx;
if (bserrno != 0) {
/*
* This is probably due to a memory allocation failure when creating the
* blob_esnap_destroy_ctx before iterating threads.
*/
SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
blob->id, bserrno);
assert(false);
}
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
bs_dev->destroy(bs_dev);
}
static void
blob_back_bs_destroy(struct spdk_blob *blob)
{
blob->back_bs_dev->destroy(blob->back_bs_dev);
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
blob->id);
blob_esnap_destroy_bs_dev_channels(blob, blob_back_bs_destroy_esnap_done,
blob->back_bs_dev);
blob->back_bs_dev = NULL;
}
@ -2526,7 +2567,7 @@ blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *bas
{
uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
return (blob->bs->dev->copy != NULL) &&
return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
}
@ -2862,7 +2903,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
case SPDK_BLOB_READ: {
spdk_bs_batch_t *batch;
batch = bs_batch_open(_ch, &cpl);
batch = bs_batch_open(_ch, &cpl, blob);
if (!batch) {
cb_fn(cb_arg, -ENOMEM);
return;
@ -2890,7 +2931,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
return;
}
batch = bs_batch_open(_ch, &cpl);
batch = bs_batch_open(_ch, &cpl, blob);
if (!batch) {
cb_fn(cb_arg, -ENOMEM);
return;
@ -2920,7 +2961,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
case SPDK_BLOB_UNMAP: {
spdk_bs_batch_t *batch;
batch = bs_batch_open(_ch, &cpl);
batch = bs_batch_open(_ch, &cpl, blob);
if (!batch) {
cb_fn(cb_arg, -ENOMEM);
return;
@ -3287,6 +3328,7 @@ bs_channel_create(void *io_device, void *ctx_buf)
TAILQ_INIT(&channel->need_cluster_alloc);
TAILQ_INIT(&channel->queued_io);
RB_INIT(&channel->esnap_channels);
return 0;
}
@ -3309,6 +3351,8 @@ bs_channel_destroy(void *io_device, void *ctx_buf)
bs_user_op_abort(op, -EIO);
}
blob_esnap_destroy_bs_channel(channel);
free(channel->req_mem);
spdk_free(channel->new_cluster_page);
channel->dev->destroy_channel(channel->dev, channel->dev_channel);
@ -7788,6 +7832,24 @@ blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
bs_sequence_finish(seq, bserrno);
}
static void
blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
{
spdk_bs_sequence_t *seq = cb_arg;
if (bserrno != 0) {
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
blob->id, bserrno);
bs_sequence_finish(seq, bserrno);
return;
}
SPDK_DEBUGLOG(blob_esnap, "blob %" PRIx64 ": closed, syncing metadata\n", blob->id);
/* Sync metadata */
blob_persist(seq, blob, blob_close_cpl, blob);
}
void
spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
{
@ -7813,6 +7875,11 @@ spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_ar
return;
}
if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
blob_esnap_destroy_bs_dev_channels(blob, blob_close_esnap_done, seq);
return;
}
/* Sync metadata */
blob_persist(seq, blob, blob_close_cpl, blob);
}
@ -7827,6 +7894,7 @@ struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
void
spdk_bs_free_io_channel(struct spdk_io_channel *channel)
{
blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
spdk_put_io_channel(channel);
}
@ -8574,5 +8642,164 @@ spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
}
struct spdk_io_channel *
blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
{
struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(ch);
struct spdk_bs_dev *bs_dev = blob->back_bs_dev;
struct blob_esnap_channel find = {};
struct blob_esnap_channel *esnap_channel, *existing;
find.blob_id = blob->id;
esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
if (spdk_likely(esnap_channel != NULL)) {
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
blob->id, spdk_thread_get_name(spdk_get_thread()));
return esnap_channel->channel;
}
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
blob->id, spdk_thread_get_name(spdk_get_thread()));
esnap_channel = calloc(1, sizeof(*esnap_channel));
if (esnap_channel == NULL) {
SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
find.blob_id);
return NULL;
}
esnap_channel->channel = bs_dev->create_channel(bs_dev);
if (esnap_channel->channel == NULL) {
SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
free(esnap_channel);
return NULL;
}
esnap_channel->blob_id = find.blob_id;
existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
if (spdk_unlikely(existing != NULL)) {
/*
* This should be unreachable: all modifications to this tree happen on this thread.
*/
SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
assert(false);
bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
free(esnap_channel);
return existing->channel;
}
return esnap_channel->channel;
}
static int
blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
{
return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
}
struct blob_esnap_destroy_ctx {
spdk_blob_op_with_handle_complete cb_fn;
void *cb_arg;
struct spdk_blob *blob;
struct spdk_bs_dev *back_bs_dev;
};
static void
blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
{
struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
struct spdk_blob *blob = ctx->blob;
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
blob->id);
ctx->cb_fn(ctx->cb_arg, blob, status);
free(ctx);
}
static void
blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
{
struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
struct spdk_blob *blob = ctx->blob;
struct spdk_bs_dev *bs_dev = ctx->back_bs_dev;
struct spdk_io_channel *channel = spdk_io_channel_iter_get_channel(i);
struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(channel);
struct blob_esnap_channel *esnap_channel;
struct blob_esnap_channel find = {};
assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
find.blob_id = blob->id;
esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
if (esnap_channel != NULL) {
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
blob->id, spdk_thread_get_name(spdk_get_thread()));
RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
free(esnap_channel);
}
spdk_for_each_channel_continue(i, 0);
}
/*
* Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
* used when closing an esnap clone blob and after decoupling from the parent.
*/
static void
blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_handle_complete cb_fn,
void *cb_arg)
{
struct blob_esnap_destroy_ctx *ctx;
if (!blob_is_esnap_clone(blob)) {
cb_fn(cb_arg, blob, 0);
return;
}
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL) {
cb_fn(cb_arg, blob, -ENOMEM);
return;
}
ctx->cb_fn = cb_fn;
ctx->cb_arg = cb_arg;
ctx->blob = blob;
ctx->back_bs_dev = blob->back_bs_dev;
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
blob->id);
spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
blob_esnap_destroy_channels_done);
}
/*
* Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
* bs_channel is destroyed.
*/
static void
blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
{
struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
spdk_thread_get_name(spdk_get_thread()));
RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
esnap_channel_tmp) {
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
": destroying one channel in thread %s\n",
esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
spdk_put_io_channel(esnap_channel->channel);
free(esnap_channel);
}
SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
spdk_thread_get_name(spdk_get_thread()));
}
SPDK_LOG_REGISTER_COMPONENT(blob)
SPDK_LOG_REGISTER_COMPONENT(blob_esnap)

View File

@ -205,6 +205,8 @@ struct spdk_bs_channel {
TAILQ_HEAD(, spdk_bs_request_set) need_cluster_alloc;
TAILQ_HEAD(, spdk_bs_request_set) queued_io;
RB_HEAD(blob_esnap_channel_tree, blob_esnap_channel) esnap_channels;
};
/** operation type */
@ -420,6 +422,8 @@ SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super
struct spdk_bs_dev *bs_create_zeroes_dev(void);
struct spdk_bs_dev *bs_create_blob_bs_dev(struct spdk_blob *blob);
struct spdk_io_channel *blob_esnap_get_io_channel(struct spdk_io_channel *ch,
struct spdk_blob *blob);
/* Unit Conversions
*

View File

@ -72,9 +72,9 @@ bs_sequence_completion(struct spdk_io_channel *channel, void *cb_arg, int bserrn
set->u.sequence.cb_fn((spdk_bs_sequence_t *)set, set->u.sequence.cb_arg, bserrno);
}
static spdk_bs_sequence_t *
bs_sequence_start(struct spdk_io_channel *_channel,
struct spdk_bs_cpl *cpl)
static inline spdk_bs_sequence_t *
bs_sequence_start(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
struct spdk_io_channel *back_channel)
{
struct spdk_bs_channel *channel;
struct spdk_bs_request_set *set;
@ -90,7 +90,7 @@ bs_sequence_start(struct spdk_io_channel *_channel,
set->cpl = *cpl;
set->bserrno = 0;
set->channel = channel;
set->back_channel = _channel;
set->back_channel = back_channel;
set->cb_args.cb_fn = bs_sequence_completion;
set->cb_args.cb_arg = set;
@ -104,7 +104,7 @@ bs_sequence_start(struct spdk_io_channel *_channel,
spdk_bs_sequence_t *
bs_sequence_start_bs(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl)
{
return bs_sequence_start(_channel, cpl);
return bs_sequence_start(_channel, cpl, _channel);
}
/* Use when performing IO on a blob. */
@ -112,7 +112,24 @@ spdk_bs_sequence_t *
bs_sequence_start_blob(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
struct spdk_blob *blob)
{
return bs_sequence_start(_channel, cpl);
struct spdk_io_channel *esnap_ch = _channel;
if (spdk_blob_is_esnap_clone(blob)) {
esnap_ch = blob_esnap_get_io_channel(_channel, blob);
if (esnap_ch == NULL) {
/*
* The most likely reason we are here is because of some logic error
* elsewhere that caused channel allocations to fail. We could get here due
* to being out of memory as well. If we are out of memory, the process is
* this will be just one of many problems that this process will be having.
* Killing it off debug builds now due to logic errors is the right thing to
* do and killing it off due to ENOMEM is no big loss.
*/
assert(false);
return NULL;
}
}
return bs_sequence_start(_channel, cpl, esnap_ch);
}
void
@ -308,11 +325,18 @@ bs_batch_completion(struct spdk_io_channel *_channel,
}
spdk_bs_batch_t *
bs_batch_open(struct spdk_io_channel *_channel,
struct spdk_bs_cpl *cpl)
bs_batch_open(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl, struct spdk_blob *blob)
{
struct spdk_bs_channel *channel;
struct spdk_bs_request_set *set;
struct spdk_io_channel *back_channel = _channel;
if (spdk_blob_is_esnap_clone(blob)) {
back_channel = blob_esnap_get_io_channel(_channel, blob);
if (back_channel == NULL) {
return NULL;
}
}
channel = spdk_io_channel_get_ctx(_channel);
assert(channel != NULL);
@ -325,7 +349,7 @@ bs_batch_open(struct spdk_io_channel *_channel,
set->cpl = *cpl;
set->bserrno = 0;
set->channel = channel;
set->back_channel = _channel;
set->back_channel = back_channel;
set->u.batch.cb_fn = NULL;
set->u.batch.cb_arg = NULL;

View File

@ -91,8 +91,8 @@ struct spdk_bs_request_set {
*/
struct spdk_bs_channel *channel;
/*
* The channel used by the blobstore to perform IO on back_bs_dev.
* For now, back_channel == spdk_io_channel_get_ctx(set->channel).
* The channel used by the blobstore to perform IO on back_bs_dev. Unless the blob
* is an esnap clone, back_channel == spdk_io_channel_get_ctx(set->channel).
*/
struct spdk_io_channel *back_channel;
@ -135,6 +135,9 @@ spdk_bs_sequence_t *bs_sequence_start_bs(struct spdk_io_channel *channel,
spdk_bs_sequence_t *bs_sequence_start_blob(struct spdk_io_channel *channel,
struct spdk_bs_cpl *cpl, struct spdk_blob *blob);
spdk_bs_sequence_t *bs_sequence_start_esnap(struct spdk_io_channel *channel,
struct spdk_bs_cpl *cpl, struct spdk_blob *blob);
void bs_sequence_read_bs_dev(spdk_bs_sequence_t *seq, struct spdk_bs_dev *bs_dev,
void *payload, uint64_t lba, uint32_t lba_count,
spdk_bs_sequence_cpl cb_fn, void *cb_arg);
@ -172,7 +175,7 @@ void bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno);
void bs_user_op_sequence_finish(void *cb_arg, int bserrno);
spdk_bs_batch_t *bs_batch_open(struct spdk_io_channel *channel,
struct spdk_bs_cpl *cpl);
struct spdk_bs_cpl *cpl, struct spdk_blob *blob);
void bs_batch_read_bs_dev(spdk_bs_batch_t *batch, struct spdk_bs_dev *bs_dev,
void *payload, uint64_t lba, uint32_t lba_count);

View File

@ -12,6 +12,7 @@
#include "common/lib/ut_multithread.c"
#include "../bs_dev_common.c"
#include "thread/thread.c"
#include "blob/blobstore.c"
#include "blob/request.c"
#include "blob/zeroes.c"
@ -7436,19 +7437,21 @@ blob_esnap_create(void)
/* Create an esnap clone blob then verify it is an esnap clone and has the right size */
ut_spdk_blob_opts_init(&opts);
ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, &esnap_opts);
ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, NULL, &esnap_opts);
opts.esnap_id = &esnap_opts;
opts.esnap_id_len = sizeof(esnap_opts);
opts.num_clusters = esnap_num_clusters;
blob = ut_blob_create_and_open(bs, &opts);
SPDK_CU_ASSERT_FATAL(blob != NULL);
SPDK_CU_ASSERT_FATAL(spdk_blob_is_esnap_clone(blob));
SPDK_CU_ASSERT_FATAL(blob_is_esnap_clone(blob));
sz = spdk_blob_get_num_clusters(blob);
CU_ASSERT(sz == esnap_num_clusters);
ut_blob_close_and_delete(bs, blob);
/* Create an esnap clone without the size and verify it can be grown */
ut_spdk_blob_opts_init(&opts);
ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, &esnap_opts);
ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, NULL, &esnap_opts);
opts.esnap_id = &esnap_opts;
opts.esnap_id_len = sizeof(esnap_opts);
blob = ut_blob_create_and_open(bs, &opts);
@ -7538,6 +7541,346 @@ blob_esnap_create(void)
g_blob = NULL;
}
static bool
blob_esnap_verify_contents(struct spdk_blob *blob, struct spdk_io_channel *ch,
uint64_t offset, uint64_t size, uint32_t readsize, const char *how)
{
const uint32_t bs_blksz = blob->bs->io_unit_size;
const uint32_t esnap_blksz = blob->back_bs_dev->blocklen;
const uint32_t start_blk = offset / bs_blksz;
const uint32_t num_blocks = spdk_max(size, readsize) / bs_blksz;
const uint32_t blocks_per_read = spdk_min(size, readsize) / bs_blksz;
uint32_t blob_block;
struct iovec iov;
uint8_t buf[spdk_min(size, readsize)];
bool block_ok;
SPDK_CU_ASSERT_FATAL(offset % bs_blksz == 0);
SPDK_CU_ASSERT_FATAL(size % bs_blksz == 0);
SPDK_CU_ASSERT_FATAL(readsize % bs_blksz == 0);
memset(buf, 0, readsize);
iov.iov_base = buf;
iov.iov_len = readsize;
for (blob_block = start_blk; blob_block < num_blocks; blob_block += blocks_per_read) {
if (strcmp(how, "read") == 0) {
spdk_blob_io_read(blob, ch, buf, blob_block, blocks_per_read,
bs_op_complete, NULL);
} else if (strcmp(how, "readv") == 0) {
spdk_blob_io_readv(blob, ch, &iov, 1, blob_block, blocks_per_read,
bs_op_complete, NULL);
} else if (strcmp(how, "readv_ext") == 0) {
/*
* This is currently pointless. NULL ext_opts leads to dev->readv(), not
* dev->readv_ext().
*/
spdk_blob_io_readv_ext(blob, ch, &iov, 1, blob_block, blocks_per_read,
bs_op_complete, NULL, NULL);
} else {
abort();
}
poll_threads();
CU_ASSERT(g_bserrno == 0);
if (g_bserrno != 0) {
return false;
}
block_ok = ut_esnap_content_is_correct(buf, blocks_per_read * bs_blksz, blob->id,
blob_block * bs_blksz, esnap_blksz);
CU_ASSERT(block_ok);
if (!block_ok) {
return false;
}
}
return true;
}
static void
blob_esnap_io_size(uint32_t bs_blksz, uint32_t esnap_blksz)
{
struct spdk_bs_dev *dev;
struct spdk_blob_store *bs;
struct spdk_bs_opts bsopts;
struct spdk_blob_opts opts;
struct ut_esnap_opts esnap_opts;
struct spdk_blob *blob;
const uint32_t cluster_sz = 16 * 1024;
const uint64_t esnap_num_clusters = 4;
const uint32_t esnap_sz = cluster_sz * esnap_num_clusters;
const uint64_t esnap_num_blocks = esnap_sz / esnap_blksz;
const uint64_t blob_num_blocks = esnap_sz / bs_blksz;
uint32_t block;
struct spdk_io_channel *bs_ch;
spdk_bs_opts_init(&bsopts, sizeof(bsopts));
bsopts.cluster_sz = cluster_sz;
bsopts.esnap_bs_dev_create = ut_esnap_create;
/* Create device with desired block size */
dev = init_dev();
dev->blocklen = bs_blksz;
dev->blockcnt = DEV_BUFFER_SIZE / dev->blocklen;
/* Initialize a new blob store */
spdk_bs_init(dev, &bsopts, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
SPDK_CU_ASSERT_FATAL(g_bs->io_unit_size == bs_blksz);
bs = g_bs;
bs_ch = spdk_bs_alloc_io_channel(bs);
SPDK_CU_ASSERT_FATAL(bs_ch != NULL);
/* Create and open the esnap clone */
ut_spdk_blob_opts_init(&opts);
ut_esnap_opts_init(esnap_blksz, esnap_num_blocks, __func__, NULL, &esnap_opts);
opts.esnap_id = &esnap_opts;
opts.esnap_id_len = sizeof(esnap_opts);
opts.num_clusters = esnap_num_clusters;
blob = ut_blob_create_and_open(bs, &opts);
SPDK_CU_ASSERT_FATAL(blob != NULL);
/* Verify that large reads return the content of the esnap device */
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, esnap_sz, "read"));
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, esnap_sz, "readv"));
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, esnap_sz, "readv_ext"));
/* Verify that small reads return the content of the esnap device */
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, bs_blksz, "read"));
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, bs_blksz, "readv"));
CU_ASSERT(blob_esnap_verify_contents(blob, bs_ch, 0, esnap_sz, bs_blksz, "readv_ext"));
/* Write one blob block at a time; verify that the surrounding blocks are OK */
for (block = 0; block < blob_num_blocks; block++) {
char buf[bs_blksz];
union ut_word word;
word.f.blob_id = 0xfedcba90;
word.f.lba = block;
ut_memset8(buf, word.num, bs_blksz);
spdk_blob_io_write(blob, bs_ch, buf, block, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
if (g_bserrno != 0) {
break;
}
/* Read and verify the block before the current block */
if (block != 0) {
spdk_blob_io_read(blob, bs_ch, buf, block - 1, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
if (g_bserrno != 0) {
break;
}
CU_ASSERT(ut_esnap_content_is_correct(buf, bs_blksz, word.f.blob_id,
(block - 1) * bs_blksz, bs_blksz));
}
/* Read and verify the current block */
spdk_blob_io_read(blob, bs_ch, buf, block, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
if (g_bserrno != 0) {
break;
}
CU_ASSERT(ut_esnap_content_is_correct(buf, bs_blksz, word.f.blob_id,
block * bs_blksz, bs_blksz));
/* Check the block that follows */
if (block + 1 < blob_num_blocks) {
g_bserrno = 0xbad;
spdk_blob_io_read(blob, bs_ch, buf, block + 1, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
if (g_bserrno != 0) {
break;
}
CU_ASSERT(ut_esnap_content_is_correct(buf, bs_blksz, blob->id,
(block + 1) * bs_blksz,
esnap_blksz));
}
}
/* Clean up */
spdk_bs_free_io_channel(bs_ch);
g_bserrno = 0xbad;
spdk_blob_close(blob, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
memset(g_dev_buffer, 0, DEV_BUFFER_SIZE);
}
static void
blob_esnap_io_4096_4096(void)
{
blob_esnap_io_size(4096, 4096);
}
static void
blob_esnap_io_512_512(void)
{
blob_esnap_io_size(512, 512);
}
static void
blob_esnap_io_4096_512(void)
{
blob_esnap_io_size(4096, 512);
}
static void
blob_esnap_io_512_4096(void)
{
struct spdk_bs_dev *dev;
struct spdk_blob_store *bs;
struct spdk_bs_opts bs_opts;
struct spdk_blob_opts blob_opts;
struct ut_esnap_opts esnap_opts;
uint64_t cluster_sz = 16 * 1024;
uint32_t bs_blksz = 512;
uint32_t esnap_blksz = 4096;
uint64_t esnap_num_blocks = 64;
spdk_blob_id blobid;
/* Create device with desired block size */
dev = init_dev();
dev->blocklen = bs_blksz;
dev->blockcnt = DEV_BUFFER_SIZE / dev->blocklen;
/* Initialize a new blob store */
spdk_bs_opts_init(&bs_opts, sizeof(bs_opts));
bs_opts.cluster_sz = cluster_sz;
bs_opts.esnap_bs_dev_create = ut_esnap_create;
spdk_bs_init(dev, &bs_opts, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
SPDK_CU_ASSERT_FATAL(g_bs->io_unit_size == bs_blksz);
bs = g_bs;
/* Try to create and open the esnap clone. Create should succeed, open should fail. */
ut_spdk_blob_opts_init(&blob_opts);
ut_esnap_opts_init(esnap_blksz, esnap_num_blocks, __func__, NULL, &esnap_opts);
blob_opts.esnap_id = &esnap_opts;
blob_opts.esnap_id_len = sizeof(esnap_opts);
blob_opts.num_clusters = esnap_num_blocks * esnap_blksz / bs_blksz;
spdk_bs_create_blob_ext(bs, &blob_opts, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == -EINVAL);
CU_ASSERT(g_blob == NULL);
/* Clean up */
spdk_bs_unload(bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
memset(g_dev_buffer, 0, DEV_BUFFER_SIZE);
}
static void
blob_esnap_thread_add_remove(void)
{
struct spdk_blob_store *bs = g_bs;
struct spdk_blob_opts opts;
struct ut_esnap_opts ut_esnap_opts;
struct spdk_blob *blob;
struct ut_esnap_dev *ut_dev;
spdk_blob_id blobid;
uint64_t start_thread = g_ut_thread_id;
bool destroyed = false;
struct spdk_io_channel *ch0, *ch1;
struct ut_esnap_channel *ut_ch0, *ut_ch1;
const uint32_t blocklen = bs->io_unit_size;
char buf[blocklen * 4];
SPDK_CU_ASSERT_FATAL(g_ut_num_threads > 1);
set_thread(0);
/* Create the esnap clone */
ut_esnap_opts_init(blocklen, 2048, "add_remove_1", &destroyed, &ut_esnap_opts);
ut_spdk_blob_opts_init(&opts);
opts.esnap_id = &ut_esnap_opts;
opts.esnap_id_len = sizeof(ut_esnap_opts);
opts.num_clusters = 10;
spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
/* Open the blob. No channels should be allocated yet. */
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
ut_dev = (struct ut_esnap_dev *)blob->back_bs_dev;
CU_ASSERT(ut_dev != NULL);
CU_ASSERT(ut_dev->num_channels == 0);
/* Create a channel on thread 0. It is lazily created on the first read. */
ch0 = spdk_bs_alloc_io_channel(bs);
CU_ASSERT(ch0 != NULL);
ut_ch0 = ut_esnap_get_io_channel(ch0, blobid);
CU_ASSERT(ut_ch0 == NULL);
CU_ASSERT(ut_dev->num_channels == 0);
spdk_blob_io_read(blob, ch0, buf, 0, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(ut_dev->num_channels == 1);
ut_ch0 = ut_esnap_get_io_channel(ch0, blobid);
CU_ASSERT(ut_ch0 != NULL);
CU_ASSERT(ut_ch0->blocks_read == 1);
/* Create a channel on thread 1 and verify its lazy creation too. */
set_thread(1);
ch1 = spdk_bs_alloc_io_channel(bs);
CU_ASSERT(ch1 != NULL);
ut_ch1 = ut_esnap_get_io_channel(ch1, blobid);
CU_ASSERT(ut_ch1 == NULL);
CU_ASSERT(ut_dev->num_channels == 1);
spdk_blob_io_read(blob, ch1, buf, 0, 4, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(ut_dev->num_channels == 2);
ut_ch1 = ut_esnap_get_io_channel(ch1, blobid);
CU_ASSERT(ut_ch1 != NULL);
CU_ASSERT(ut_ch1->blocks_read == 4);
/* Close the channel on thread 0 and verify the bs_dev channel is also gone. */
set_thread(0);
spdk_bs_free_io_channel(ch0);
poll_threads();
CU_ASSERT(ut_dev->num_channels == 1);
/* Close the blob. There is no outstanding IO so it should close right away. */
g_bserrno = 0xbad;
spdk_blob_close(blob, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(destroyed);
/* The esnap channel for the blob should be gone now too. */
ut_ch1 = ut_esnap_get_io_channel(ch1, blobid);
CU_ASSERT(ut_ch1 == NULL);
/* Clean up */
set_thread(1);
spdk_bs_free_io_channel(ch1);
set_thread(start_thread);
}
static void
freeze_done(void *cb_arg, int bserrno)
{
@ -7704,10 +8047,12 @@ suite_esnap_bs_setup(void)
static void
suite_bs_cleanup(void)
{
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
if (g_bs != NULL) {
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
}
memset(g_dev_buffer, 0, DEV_BUFFER_SIZE);
}
@ -7870,6 +8215,11 @@ main(int argc, char **argv)
CU_ADD_TEST(suite_esnap_bs, blob_esnap_create);
CU_ADD_TEST(suite_bs, blob_nested_freezes);
CU_ADD_TEST(suite, blob_ext_md_pages);
CU_ADD_TEST(suite, blob_esnap_io_4096_4096);
CU_ADD_TEST(suite, blob_esnap_io_512_512);
CU_ADD_TEST(suite, blob_esnap_io_4096_512);
CU_ADD_TEST(suite, blob_esnap_io_512_4096);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_thread_add_remove);
allocate_threads(2);
set_thread(0);

View File

@ -13,6 +13,8 @@
* struct spdk_bs_opts bs_opts;
* struct spdk_blob_opts blob_opts;
* struct ut_snap_opts esnap_opts;
* struct spdk_io_channel *bs_chan;
* bool destroyed = false;
*
* Create the blobstore with external snapshot support.
* dev = init_dev();
@ -21,16 +23,45 @@
* bs_opts.esnap_bs_dev_create = ut_esnap_create;
*
* Create an esnap clone blob.
* ut_spdk_blob_opts_init(&blob_opts);
* ut_esnap_opts_init(512, 2048, "name", &esnap_opts);
* ut_esnap_opts_init(512, 2048, "name", &destroyed, &esnap_opts);
* blob_opts.esnap_id = &esnap_opts;
* blob_opts.esnap_id_len = sizeof(esnap_opts);
* opts.num_clusters = 4;
* blob = ut_blob_create_and_open(bs, &opts);
*
* At this point the blob can be used like any other blob.
* Do stuff like you would with any other blob.
* bs_chan = spdk_bs_alloc_io_channel(bs);
* ...
*
* You can check the value of destroyed to verify that spdk_blob_close() led to the
* destruction of the bs_dev created during spdk_blob_open().
* spdk_blob_close(blob, blob_op_complete, NULL);
* poll_threads();
* CU_ASSERT(destroyed);
*/
static void
ut_memset4(void *dst, uint32_t pat, size_t len)
{
uint32_t *vals = dst;
assert((len % 4) == 0);
for (size_t i = 0; i < (len / 4); i++) {
vals[i] = pat;
}
}
static void
ut_memset8(void *dst, uint64_t pat, size_t len)
{
uint64_t *vals = dst;
assert((len % 8) == 0);
for (size_t i = 0; i < (len / 8); i++) {
vals[i] = pat;
}
}
#define UT_ESNAP_OPTS_MAGIC 0xbadf1ea5
struct ut_esnap_opts {
/*
@ -40,6 +71,11 @@ struct ut_esnap_opts {
uint32_t magic;
uint32_t block_size;
uint64_t num_blocks;
/*
* If non-NULL, referenced address will be set to true when the device is fully destroyed.
* This address must remain valid for the life of the blob, even across blobstore reload.
*/
bool *destroyed;
char name[32];
};
@ -50,21 +86,197 @@ struct ut_esnap_dev {
uint32_t num_channels;
};
struct ut_esnap_channel {
struct ut_esnap_dev *dev;
struct spdk_thread *thread;
uint64_t blocks_read;
};
static void
ut_esnap_opts_init(uint32_t block_size, uint32_t num_blocks, const char *name,
ut_esnap_opts_init(uint32_t block_size, uint32_t num_blocks, const char *name, bool *destroyed,
struct ut_esnap_opts *opts)
{
memset(opts, 0, sizeof(*opts));
opts->magic = UT_ESNAP_OPTS_MAGIC;
opts->block_size = block_size;
opts->num_blocks = num_blocks;
opts->destroyed = destroyed;
spdk_strcpy_pad(opts->name, name, sizeof(opts->name) - 1, '\0');
}
static struct spdk_io_channel *
ut_esnap_create_channel(struct spdk_bs_dev *dev)
{
struct spdk_io_channel *ch;
ch = spdk_get_io_channel(dev);
if (ch == NULL) {
return NULL;
}
return ch;
}
static void
ut_esnap_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel)
{
spdk_put_io_channel(channel);
}
/*
* When reading, each block is filled with 64-bit values made up of the least significant 32 bits of
* the blob ID and the lba.
*/
union ut_word {
uint64_t num;
struct {
uint32_t blob_id;
uint32_t lba;
} f;
};
static bool
ut_esnap_content_is_correct(void *buf, uint32_t buf_sz, uint32_t id,
uint32_t start_byte, uint32_t esnap_blksz)
{
union ut_word *words = buf;
uint32_t off, i, j, lba;
j = 0;
for (off = start_byte; off < start_byte + buf_sz; off += esnap_blksz) {
lba = off / esnap_blksz;
for (i = 0; i < esnap_blksz / sizeof(*words); i++) {
if (words[j].f.blob_id != id || words[j].f.lba != lba) {
return false;
}
j++;
}
}
return true;
}
static void
ut_esnap_read(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel, void *payload,
uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
{
struct ut_esnap_dev *ut_dev = (struct ut_esnap_dev *)bs_dev;
struct ut_esnap_channel *ut_ch = spdk_io_channel_get_ctx(channel);
const uint32_t block_size = ut_dev->ut_opts.block_size;
union ut_word word;
uint64_t cur;
/* The channel passed in must be associated with this bs_dev. */
CU_ASSERT(&ut_ch->dev->bs_dev == bs_dev);
CU_ASSERT(spdk_get_thread() == ut_ch->thread);
SPDK_CU_ASSERT_FATAL(sizeof(word) == 8);
SPDK_CU_ASSERT_FATAL(lba + lba_count <= UINT32_MAX);
word.f.blob_id = ut_dev->blob_id & 0xffffffff;
for (cur = 0; cur < lba_count; cur++) {
word.f.lba = lba + cur;
ut_memset8(payload + cur * block_size, word.num, block_size);
}
ut_ch->blocks_read += lba_count;
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0);
}
static void
ut_esnap_readv(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args)
{
struct ut_esnap_channel *ut_ch = spdk_io_channel_get_ctx(channel);
/* The channel passed in must be associated with this bs_dev. */
CU_ASSERT(&ut_ch->dev->bs_dev == bs_dev);
CU_ASSERT(spdk_get_thread() == ut_ch->thread);
if (iovcnt != 1) {
CU_ASSERT(false);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -ENOTSUP);
return;
}
ut_esnap_read(bs_dev, channel, iov->iov_base, lba, lba_count, cb_args);
}
static void
ut_esnap_readv_ext(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args, struct spdk_blob_ext_io_opts *io_opts)
{
struct ut_esnap_channel *ut_ch = spdk_io_channel_get_ctx(channel);
/* The channel passed in must be associated with this bs_dev. */
CU_ASSERT(&ut_ch->dev->bs_dev == bs_dev);
CU_ASSERT(spdk_get_thread() == ut_ch->thread);
CU_ASSERT(false);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -ENOTSUP);
}
static bool
ut_esnap_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
return false;
}
static int
ut_esnap_io_channel_create(void *io_device, void *ctx)
{
struct ut_esnap_dev *ut_dev = io_device;
struct ut_esnap_channel *ut_ch = ctx;
ut_ch->dev = ut_dev;
ut_ch->thread = spdk_get_thread();
ut_ch->blocks_read = 0;
ut_dev->num_channels++;
return 0;
}
static void
ut_esnap_io_channel_destroy(void *io_device, void *ctx)
{
struct ut_esnap_dev *ut_dev = io_device;
struct ut_esnap_channel *ut_ch = ctx;
CU_ASSERT(ut_ch->thread == spdk_get_thread());
CU_ASSERT(ut_dev->num_channels > 0);
ut_dev->num_channels--;
return;
}
static void
ut_esnap_dev_free(void *io_device)
{
struct ut_esnap_dev *ut_dev = io_device;
if (ut_dev->ut_opts.destroyed != NULL) {
*ut_dev->ut_opts.destroyed = true;
}
CU_ASSERT(ut_dev->num_channels == 0);
ut_memset4(ut_dev, 0xdeadf1ea, sizeof(*ut_dev));
free(ut_dev);
}
static void
ut_esnap_destroy(struct spdk_bs_dev *bs_dev)
{
free(bs_dev);
spdk_io_device_unregister(bs_dev, ut_esnap_dev_free);
}
static bool
ut_esnap_translate_lba(struct spdk_bs_dev *dev, uint64_t lba, uint64_t *base_lba)
{
*base_lba = lba;
return true;
}
static struct spdk_bs_dev *
@ -86,7 +298,17 @@ ut_esnap_dev_alloc(const struct ut_esnap_opts *opts)
bs_dev->blocklen = opts->block_size;
bs_dev->blockcnt = opts->num_blocks;
bs_dev->create_channel = ut_esnap_create_channel;
bs_dev->destroy_channel = ut_esnap_destroy_channel;
bs_dev->destroy = ut_esnap_destroy;
bs_dev->read = ut_esnap_read;
bs_dev->readv = ut_esnap_readv;
bs_dev->readv_ext = ut_esnap_readv_ext;
bs_dev->is_zeroes = ut_esnap_is_zeroes;
bs_dev->translate_lba = ut_esnap_translate_lba;
spdk_io_device_register(ut_dev, ut_esnap_io_channel_create, ut_esnap_io_channel_destroy,
sizeof(struct ut_esnap_channel), opts->name);
return bs_dev;
}
@ -134,3 +356,19 @@ ut_esnap_create_with_count(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob,
return ut_esnap_create(NULL, NULL, blob, id, id_len, bs_devp);
}
static struct ut_esnap_channel *
ut_esnap_get_io_channel(struct spdk_io_channel *ch, spdk_blob_id blob_id)
{
struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(ch);
struct blob_esnap_channel find = {};
struct blob_esnap_channel *esnap_channel;
find.blob_id = blob_id;
esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
if (esnap_channel == NULL) {
return NULL;
}
return spdk_io_channel_get_ctx(esnap_channel->channel);
}