blob: snapshots of esnap clones

An esnap clone needs special handling as snapshots are created and
removed. In particular: the following must exist on the blob that
directly references the external snapshot and must be removed from
others:

- Ensure SPDK_BLOB_EXTERNAL_SNAPSHOT invalid flag exists only on the
  esnap clone.
- Ensure BLOB_EXTERNAL_SNAPSHOT_ID internal xattr exists only on the
  esnap clone.
- Clean up any esnap IO channels on a blob that is no longer an esnap
  clone due to snapshot creation or removal.

See the diagrams and description in blob_esnap_clone_snapshot() in
blob_ut.c for details.

Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Change-Id: Ie4125d64d5bac9cfa7d6c7cc9a543d72a169f6ee
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11573
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Mike Gerdts 2022-01-25 09:26:39 -06:00 committed by Jim Harris
parent b47cee6c96
commit 94c43313ab
2 changed files with 320 additions and 9 deletions

View File

@ -6187,6 +6187,30 @@ bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
blob2->active.extent_pages = extent_page_temp;
}
/* Copies an internal xattr */
static int
bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
{
const void *val = NULL;
size_t len;
int bserrno;
bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
if (bserrno != 0) {
SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s xattr"
BLOB_EXTERNAL_SNAPSHOT_ID " XATTR\n", fromblob->id, name);
return bserrno;
}
bserrno = blob_set_xattr(toblob, name, val, len, true);
if (bserrno != 0) {
SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
name, toblob->id);
return bserrno;
}
return 0;
}
static void
bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
{
@ -6196,6 +6220,10 @@ bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
if (bserrno != 0) {
bs_snapshot_swap_cluster_maps(newblob, origblob);
if (blob_is_esnap_clone(newblob)) {
bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
}
bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
return;
}
@ -6259,6 +6287,25 @@ bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
return;
}
/* Remove the xattr that references an external snapshot */
if (blob_is_esnap_clone(origblob)) {
origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
if (bserrno != 0) {
if (bserrno == -ENOENT) {
SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
" xattr to remove\n", origblob->id);
assert(false);
} else {
/* return cluster map back to original */
bs_snapshot_swap_cluster_maps(newblob, origblob);
blob_set_thin_provision(newblob);
bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
return;
}
}
}
bs_blob_list_remove(origblob);
origblob->parent_id = newblob->id;
/* set clone blob as thin provisioned */
@ -6285,6 +6332,12 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc)
ctx->frozen = true;
if (blob_is_esnap_clone(origblob)) {
/* Clean up any channels associated with the original blob id because future IO will
* perform IO using the snapshot blob_id.
*/
blob_esnap_destroy_bs_dev_channels(origblob, NULL, NULL);
}
if (newblob->back_bs_dev) {
blob_back_bs_destroy(newblob);
}
@ -6295,7 +6348,17 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc)
/* inherit parent from original blob if set */
newblob->parent_id = origblob->parent_id;
if (origblob->parent_id != SPDK_BLOBID_INVALID) {
switch (origblob->parent_id) {
case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
if (bserrno != 0) {
bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
return;
}
break;
case SPDK_BLOBID_INVALID:
break;
default:
/* Set internal xattr for snapshot id */
bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
&origblob->parent_id, sizeof(spdk_blob_id), true);
@ -7016,7 +7079,13 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
snapshot_entry->clone_count--;
assert(TAILQ_EMPTY(&snapshot_entry->clones));
if (ctx->snapshot->parent_id != SPDK_BLOBID_INVALID) {
switch (ctx->snapshot->parent_id) {
case SPDK_BLOBID_INVALID:
case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
/* No parent snapshot - just remove clone entry */
free(clone_entry);
break;
default:
/* This snapshot is at the same time a clone of another snapshot - we need to
* update parent snapshot (remove current clone, add new one inherited from
* the snapshot that is being removed) */
@ -7030,9 +7099,6 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
free(snapshot_clone_entry);
} else {
/* No parent snapshot - just remove clone entry */
free(clone_entry);
}
/* Restore md_ro flags */
@ -7091,11 +7157,34 @@ delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
static void
delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
{
int bserrno;
/* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
blob_back_bs_destroy(ctx->clone);
/* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
if (ctx->parent_snapshot_entry != NULL) {
if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
BLOB_EXTERNAL_SNAPSHOT_ID);
if (bserrno != 0) {
ctx->bserrno = bserrno;
/* Restore snapshot to previous state */
bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
if (bserrno != 0) {
delete_snapshot_cleanup_clone(ctx, bserrno);
return;
}
spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
return;
}
ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
/* Do not delete the external snapshot along with this snapshot */
ctx->snapshot->back_bs_dev = NULL;
ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
} else if (ctx->parent_snapshot_entry != NULL) {
/* ...to parent snapshot */
ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
@ -7172,6 +7261,20 @@ delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
delete_snapshot_update_extent_pages(ctx, 0);
}
static void
delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
{
struct delete_snapshot_ctx *ctx = cb_arg;
if (bserrno != 0) {
SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
blob->id, bserrno);
/* That error should not stop us from syncing metadata. */
}
spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
}
static void
delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
{
@ -7196,6 +7299,13 @@ delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
return;
}
if (blob_is_esnap_clone(ctx->snapshot)) {
blob_esnap_destroy_bs_dev_channels(ctx->snapshot,
delete_snapshot_esnap_channels_destroyed_cb,
ctx);
return;
}
spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
}
@ -8713,7 +8823,9 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
blob->id);
ctx->cb_fn(ctx->cb_arg, blob, status);
if (ctx->cb_fn != NULL) {
ctx->cb_fn(ctx->cb_arg, blob, status);
}
free(ctx);
}
@ -8754,13 +8866,17 @@ blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_han
struct blob_esnap_destroy_ctx *ctx;
if (!blob_is_esnap_clone(blob)) {
cb_fn(cb_arg, blob, 0);
if (cb_fn != NULL) {
cb_fn(cb_arg, blob, 0);
}
return;
}
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL) {
cb_fn(cb_arg, blob, -ENOMEM);
if (cb_fn != NULL) {
cb_fn(cb_arg, blob, -ENOMEM);
}
return;
}
ctx->cb_fn = cb_fn;

View File

@ -63,6 +63,42 @@ DEFINE_STUB(spdk_memory_domain_memzero, int, (struct spdk_memory_domain *src_dom
void *src_domain_ctx, struct iovec *iov, uint32_t iovcnt, void (*cpl_cb)(void *, int),
void *cpl_cb_arg), 0);
static bool
is_esnap_clone(struct spdk_blob *_blob, const void *id, size_t id_len)
{
const void *val = NULL;
size_t len = 0;
bool c0, c1, c2, c3;
CU_ASSERT(blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len,
true) == 0);
CU_ASSERT((c0 = (len == id_len)));
CU_ASSERT((c1 = (val != NULL && memcmp(val, id, len) == 0)));
CU_ASSERT((c2 = !!(_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT)));
CU_ASSERT((c3 = (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT)));
return c0 && c1 && c2 && c3;
}
static bool
is_not_esnap_clone(struct spdk_blob *_blob)
{
const void *val = NULL;
size_t len = 0;
bool c1, c2, c3, c4;
CU_ASSERT((c1 = (blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len,
true) == -ENOENT)));
CU_ASSERT((c2 = (val == NULL)));
CU_ASSERT((c3 = ((_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT) == 0)));
CU_ASSERT((c4 = (_blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT)));
return c1 && c2 && c3 && c4;
}
#define UT_ASSERT_IS_ESNAP_CLONE(_blob, _id, _len) CU_ASSERT(is_esnap_clone(_blob, _id, _len))
#define UT_ASSERT_IS_NOT_ESNAP_CLONE(_blob) CU_ASSERT(is_not_esnap_clone(_blob))
static void
_get_xattr_value(void *arg, const char *name,
const void **value, size_t *value_len)
@ -8014,6 +8050,164 @@ blob_ext_md_pages(void)
g_bs = NULL;
}
static void
blob_esnap_clone_snapshot(void)
{
/*
* When a snapshot is created, the blob that is being snapped becomes
* the leaf node (a clone of the snapshot) and the newly created
* snapshot sits between the snapped blob and the external snapshot.
*
* Before creating snap1
*
* ,--------. ,----------.
* | blob | | vbdev |
* | blob1 |<----| nvme1n42 |
* | (rw) | | (ro) |
* `--------' `----------'
* Figure 1
*
* After creating snap1
*
* ,--------. ,--------. ,----------.
* | blob | | blob | | vbdev |
* | blob1 |<----| snap1 |<----| nvme1n42 |
* | (rw) | | (ro) | | (ro) |
* `--------' `--------' `----------'
* Figure 2
*
* Starting from Figure 2, if snap1 is removed, the chain reverts to
* what it looks like in Figure 1.
*
* Starting from Figure 2, if blob1 is removed, the chain becomes:
*
* ,--------. ,----------.
* | blob | | vbdev |
* | snap1 |<----| nvme1n42 |
* | (ro) | | (ro) |
* `--------' `----------'
* Figure 3
*
* In each case, the blob pointed to by the nvme vbdev is considered
* the "esnap clone". The esnap clone must have:
*
* - XATTR_INTERNAL for BLOB_EXTERNAL_SNAPSHOT_ID (e.g. name or UUID)
* - blob->invalid_flags must contain SPDK_BLOB_EXTERNAL_SNAPSHOT
* - blob->parent_id must be SPDK_BLOBID_EXTERNAL_SNAPSHOT.
*
* No other blob that descends from the esnap clone may have any of
* those set.
*/
struct spdk_blob_store *bs = g_bs;
const uint32_t blocklen = bs->io_unit_size;
struct spdk_blob_opts opts;
struct ut_esnap_opts esnap_opts;
struct spdk_blob *blob, *snap_blob;
spdk_blob_id blobid, snap_blobid;
bool destroyed = false;
/* Create the esnap clone */
ut_esnap_opts_init(blocklen, 2048, __func__, &destroyed, &esnap_opts);
ut_spdk_blob_opts_init(&opts);
opts.esnap_id = &esnap_opts;
opts.esnap_id_len = sizeof(esnap_opts);
opts.num_clusters = 10;
spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
/* Open the blob. */
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));
/*
* Create a snapshot of the blob. The snapshot becomes the esnap clone.
*/
spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
snap_blobid = g_blobid;
spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
snap_blob = g_blob;
UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));
/*
* Delete the snapshot. The original blob becomes the esnap clone.
*/
ut_blob_close_and_delete(bs, snap_blob);
snap_blob = NULL;
snap_blobid = SPDK_BLOBID_INVALID;
UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));
/*
* Create the snapshot again, then delete the original blob. The
* snapshot should survive as the esnap clone.
*/
spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
snap_blobid = g_blobid;
spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
snap_blob = g_blob;
UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));
ut_blob_close_and_delete(bs, blob);
blob = NULL;
blobid = SPDK_BLOBID_INVALID;
UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));
/*
* Clone the snapshot. The snapshot continues to be the esnap clone.
*/
spdk_bs_create_clone(bs, snap_blobid, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
blob = g_blob;
UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));
/*
* Delete the snapshot. The clone becomes the esnap clone.
*/
ut_blob_close_and_delete(bs, snap_blob);
snap_blob = NULL;
snap_blobid = SPDK_BLOBID_INVALID;
UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));
/*
* Clean up
*/
ut_blob_close_and_delete(bs, blob);
}
static void
suite_bs_setup(void)
{
@ -8220,6 +8414,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, blob_esnap_io_4096_512);
CU_ADD_TEST(suite, blob_esnap_io_512_4096);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_thread_add_remove);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_snapshot);
allocate_threads(2);
set_thread(0);