From 94c43313ab2a544f0091e5caea23efc53b48a4e3 Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Tue, 25 Jan 2022 09:26:39 -0600 Subject: [PATCH] blob: snapshots of esnap clones An esnap clone needs special handling as snapshots are created and removed. In particular: the following must exist on the blob that directly references the external snapshot and must be removed from others: - Ensure SPDK_BLOB_EXTERNAL_SNAPSHOT invalid flag exists only on the esnap clone. - Ensure BLOB_EXTERNAL_SNAPSHOT_ID internal xattr exists only on the esnap clone. - Clean up any esnap IO channels on a blob that is no longer an esnap clone due to snapshot creation or removal. See the diagrams and description in blob_esnap_clone_snapshot() in blob_ut.c for details. Signed-off-by: Mike Gerdts Change-Id: Ie4125d64d5bac9cfa7d6c7cc9a543d72a169f6ee Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11573 Reviewed-by: Ben Walker Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot --- lib/blob/blobstore.c | 134 +++++++++++++++++-- test/unit/lib/blob/blob.c/blob_ut.c | 195 ++++++++++++++++++++++++++++ 2 files changed, 320 insertions(+), 9 deletions(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index 5787b0217..1cd7e96ec 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -6187,6 +6187,30 @@ bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2) blob2->active.extent_pages = extent_page_temp; } +/* Copies an internal xattr */ +static int +bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name) +{ + const void *val = NULL; + size_t len; + int bserrno; + + bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true); + if (bserrno != 0) { + SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s xattr" + BLOB_EXTERNAL_SNAPSHOT_ID " XATTR\n", fromblob->id, name); + return bserrno; + } + + bserrno = blob_set_xattr(toblob, name, val, len, true); + if (bserrno != 0) { + SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n", + name, toblob->id); + return bserrno; + } + return 0; +} + static void bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno) { @@ -6196,6 +6220,10 @@ bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno) if (bserrno != 0) { bs_snapshot_swap_cluster_maps(newblob, origblob); + if (blob_is_esnap_clone(newblob)) { + bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID); + origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT; + } bs_clone_snapshot_origblob_cleanup(ctx, bserrno); return; } @@ -6259,6 +6287,25 @@ bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno) return; } + /* Remove the xattr that references an external snapshot */ + if (blob_is_esnap_clone(origblob)) { + origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT; + bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true); + if (bserrno != 0) { + if (bserrno == -ENOENT) { + SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID + " xattr to remove\n", origblob->id); + assert(false); + } else { + /* return cluster map back to original */ + bs_snapshot_swap_cluster_maps(newblob, origblob); + blob_set_thin_provision(newblob); + bs_clone_snapshot_newblob_cleanup(ctx, bserrno); + return; + } + } + } + bs_blob_list_remove(origblob); origblob->parent_id = newblob->id; /* set clone blob as thin provisioned */ @@ -6285,6 +6332,12 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc) ctx->frozen = true; + if (blob_is_esnap_clone(origblob)) { + /* Clean up any channels associated with the original blob id because future IO will + * perform IO using the snapshot blob_id. + */ + blob_esnap_destroy_bs_dev_channels(origblob, NULL, NULL); + } if (newblob->back_bs_dev) { blob_back_bs_destroy(newblob); } @@ -6295,7 +6348,17 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc) /* inherit parent from original blob if set */ newblob->parent_id = origblob->parent_id; - if (origblob->parent_id != SPDK_BLOBID_INVALID) { + switch (origblob->parent_id) { + case SPDK_BLOBID_EXTERNAL_SNAPSHOT: + bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID); + if (bserrno != 0) { + bs_clone_snapshot_newblob_cleanup(ctx, bserrno); + return; + } + break; + case SPDK_BLOBID_INVALID: + break; + default: /* Set internal xattr for snapshot id */ bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT, &origblob->parent_id, sizeof(spdk_blob_id), true); @@ -7016,7 +7079,13 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno) snapshot_entry->clone_count--; assert(TAILQ_EMPTY(&snapshot_entry->clones)); - if (ctx->snapshot->parent_id != SPDK_BLOBID_INVALID) { + switch (ctx->snapshot->parent_id) { + case SPDK_BLOBID_INVALID: + case SPDK_BLOBID_EXTERNAL_SNAPSHOT: + /* No parent snapshot - just remove clone entry */ + free(clone_entry); + break; + default: /* This snapshot is at the same time a clone of another snapshot - we need to * update parent snapshot (remove current clone, add new one inherited from * the snapshot that is being removed) */ @@ -7030,9 +7099,6 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno) TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link); TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link); free(snapshot_clone_entry); - } else { - /* No parent snapshot - just remove clone entry */ - free(clone_entry); } /* Restore md_ro flags */ @@ -7091,11 +7157,34 @@ delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno) static void delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx) { + int bserrno; + /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */ blob_back_bs_destroy(ctx->clone); /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */ - if (ctx->parent_snapshot_entry != NULL) { + if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) { + bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot, + BLOB_EXTERNAL_SNAPSHOT_ID); + if (bserrno != 0) { + ctx->bserrno = bserrno; + + /* Restore snapshot to previous state */ + bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true); + if (bserrno != 0) { + delete_snapshot_cleanup_clone(ctx, bserrno); + return; + } + + spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx); + return; + } + ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT; + ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev; + /* Do not delete the external snapshot along with this snapshot */ + ctx->snapshot->back_bs_dev = NULL; + ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT; + } else if (ctx->parent_snapshot_entry != NULL) { /* ...to parent snapshot */ ctx->clone->parent_id = ctx->parent_snapshot_entry->id; ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev; @@ -7172,6 +7261,20 @@ delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno) delete_snapshot_update_extent_pages(ctx, 0); } +static void +delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct delete_snapshot_ctx *ctx = cb_arg; + + if (bserrno != 0) { + SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n", + blob->id, bserrno); + /* That error should not stop us from syncing metadata. */ + } + + spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx); +} + static void delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno) { @@ -7196,6 +7299,13 @@ delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno) return; } + if (blob_is_esnap_clone(ctx->snapshot)) { + blob_esnap_destroy_bs_dev_channels(ctx->snapshot, + delete_snapshot_esnap_channels_destroyed_cb, + ctx); + return; + } + spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx); } @@ -8713,7 +8823,9 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status) SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n", blob->id); - ctx->cb_fn(ctx->cb_arg, blob, status); + if (ctx->cb_fn != NULL) { + ctx->cb_fn(ctx->cb_arg, blob, status); + } free(ctx); } @@ -8754,13 +8866,17 @@ blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_han struct blob_esnap_destroy_ctx *ctx; if (!blob_is_esnap_clone(blob)) { - cb_fn(cb_arg, blob, 0); + if (cb_fn != NULL) { + cb_fn(cb_arg, blob, 0); + } return; } ctx = calloc(1, sizeof(*ctx)); if (ctx == NULL) { - cb_fn(cb_arg, blob, -ENOMEM); + if (cb_fn != NULL) { + cb_fn(cb_arg, blob, -ENOMEM); + } return; } ctx->cb_fn = cb_fn; diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index eec61360c..5c0787e0d 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -63,6 +63,42 @@ DEFINE_STUB(spdk_memory_domain_memzero, int, (struct spdk_memory_domain *src_dom void *src_domain_ctx, struct iovec *iov, uint32_t iovcnt, void (*cpl_cb)(void *, int), void *cpl_cb_arg), 0); +static bool +is_esnap_clone(struct spdk_blob *_blob, const void *id, size_t id_len) +{ + const void *val = NULL; + size_t len = 0; + bool c0, c1, c2, c3; + + CU_ASSERT(blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len, + true) == 0); + CU_ASSERT((c0 = (len == id_len))); + CU_ASSERT((c1 = (val != NULL && memcmp(val, id, len) == 0))); + CU_ASSERT((c2 = !!(_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT))); + CU_ASSERT((c3 = (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT))); + + return c0 && c1 && c2 && c3; +} + +static bool +is_not_esnap_clone(struct spdk_blob *_blob) +{ + const void *val = NULL; + size_t len = 0; + bool c1, c2, c3, c4; + + CU_ASSERT((c1 = (blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len, + true) == -ENOENT))); + CU_ASSERT((c2 = (val == NULL))); + CU_ASSERT((c3 = ((_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT) == 0))); + CU_ASSERT((c4 = (_blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT))); + + return c1 && c2 && c3 && c4; +} + +#define UT_ASSERT_IS_ESNAP_CLONE(_blob, _id, _len) CU_ASSERT(is_esnap_clone(_blob, _id, _len)) +#define UT_ASSERT_IS_NOT_ESNAP_CLONE(_blob) CU_ASSERT(is_not_esnap_clone(_blob)) + static void _get_xattr_value(void *arg, const char *name, const void **value, size_t *value_len) @@ -8014,6 +8050,164 @@ blob_ext_md_pages(void) g_bs = NULL; } +static void +blob_esnap_clone_snapshot(void) +{ + /* + * When a snapshot is created, the blob that is being snapped becomes + * the leaf node (a clone of the snapshot) and the newly created + * snapshot sits between the snapped blob and the external snapshot. + * + * Before creating snap1 + * + * ,--------. ,----------. + * | blob | | vbdev | + * | blob1 |<----| nvme1n42 | + * | (rw) | | (ro) | + * `--------' `----------' + * Figure 1 + * + * After creating snap1 + * + * ,--------. ,--------. ,----------. + * | blob | | blob | | vbdev | + * | blob1 |<----| snap1 |<----| nvme1n42 | + * | (rw) | | (ro) | | (ro) | + * `--------' `--------' `----------' + * Figure 2 + * + * Starting from Figure 2, if snap1 is removed, the chain reverts to + * what it looks like in Figure 1. + * + * Starting from Figure 2, if blob1 is removed, the chain becomes: + * + * ,--------. ,----------. + * | blob | | vbdev | + * | snap1 |<----| nvme1n42 | + * | (ro) | | (ro) | + * `--------' `----------' + * Figure 3 + * + * In each case, the blob pointed to by the nvme vbdev is considered + * the "esnap clone". The esnap clone must have: + * + * - XATTR_INTERNAL for BLOB_EXTERNAL_SNAPSHOT_ID (e.g. name or UUID) + * - blob->invalid_flags must contain SPDK_BLOB_EXTERNAL_SNAPSHOT + * - blob->parent_id must be SPDK_BLOBID_EXTERNAL_SNAPSHOT. + * + * No other blob that descends from the esnap clone may have any of + * those set. + */ + struct spdk_blob_store *bs = g_bs; + const uint32_t blocklen = bs->io_unit_size; + struct spdk_blob_opts opts; + struct ut_esnap_opts esnap_opts; + struct spdk_blob *blob, *snap_blob; + spdk_blob_id blobid, snap_blobid; + bool destroyed = false; + + /* Create the esnap clone */ + ut_esnap_opts_init(blocklen, 2048, __func__, &destroyed, &esnap_opts); + ut_spdk_blob_opts_init(&opts); + opts.esnap_id = &esnap_opts; + opts.esnap_id_len = sizeof(esnap_opts); + opts.num_clusters = 10; + spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + /* Open the blob. */ + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Create a snapshot of the blob. The snapshot becomes the esnap clone. + */ + spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + snap_blobid = g_blobid; + + spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + snap_blob = g_blob; + + UT_ASSERT_IS_NOT_ESNAP_CLONE(blob); + UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Delete the snapshot. The original blob becomes the esnap clone. + */ + ut_blob_close_and_delete(bs, snap_blob); + snap_blob = NULL; + snap_blobid = SPDK_BLOBID_INVALID; + UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Create the snapshot again, then delete the original blob. The + * snapshot should survive as the esnap clone. + */ + spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + snap_blobid = g_blobid; + + spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + snap_blob = g_blob; + + UT_ASSERT_IS_NOT_ESNAP_CLONE(blob); + UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts)); + + ut_blob_close_and_delete(bs, blob); + blob = NULL; + blobid = SPDK_BLOBID_INVALID; + UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Clone the snapshot. The snapshot continues to be the esnap clone. + */ + spdk_bs_create_clone(bs, snap_blobid, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + UT_ASSERT_IS_NOT_ESNAP_CLONE(blob); + UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Delete the snapshot. The clone becomes the esnap clone. + */ + ut_blob_close_and_delete(bs, snap_blob); + snap_blob = NULL; + snap_blobid = SPDK_BLOBID_INVALID; + UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts)); + + /* + * Clean up + */ + ut_blob_close_and_delete(bs, blob); +} + static void suite_bs_setup(void) { @@ -8220,6 +8414,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, blob_esnap_io_4096_512); CU_ADD_TEST(suite, blob_esnap_io_512_4096); CU_ADD_TEST(suite_esnap_bs, blob_esnap_thread_add_remove); + CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_snapshot); allocate_threads(2); set_thread(0);