diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index bbc924e1b..cb7dd96ba 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -2544,6 +2544,8 @@ struct spdk_bs_load_ctx { spdk_bs_sequence_t *seq; spdk_blob_op_with_handle_complete iter_cb_fn; void *iter_cb_arg; + struct spdk_blob *blob; + spdk_blob_id blobid; }; static void @@ -2700,21 +2702,147 @@ _spdk_blob_set_thin_provision(struct spdk_blob *blob) blob->state = SPDK_BLOB_STATE_DIRTY; } +static void _spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno); + +static void +_spdk_bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + spdk_blob_id id; + int64_t page_num; + + /* Iterate to next blob (we can't use spdk_bs_iter_next function as our + * last blob has been removed */ + page_num = _spdk_bs_blobid_to_page(ctx->blobid); + page_num++; + page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num); + if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) { + _spdk_bs_load_iter(ctx, NULL, -ENOENT); + return; + } + + id = _spdk_bs_page_to_blobid(page_num); + + spdk_bs_open_blob(ctx->bs, id, _spdk_bs_load_iter, ctx); +} + +static void +_spdk_bs_delete_corrupted_close_cb(void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + + if (bserrno != 0) { + SPDK_ERRLOG("Failed to close corrupted blob\n"); + spdk_bs_iter_next(ctx->bs, ctx->blob, _spdk_bs_load_iter, ctx); + return; + } + + spdk_bs_delete_blob(ctx->bs, ctx->blobid, _spdk_bs_delete_corrupted_blob_cpl, ctx); +} + +static void +_spdk_bs_delete_corrupted_blob(void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + uint64_t i; + + if (bserrno != 0) { + SPDK_ERRLOG("Failed to close clone of a corrupted blob\n"); + spdk_bs_iter_next(ctx->bs, ctx->blob, _spdk_bs_load_iter, ctx); + return; + } + + /* Snapshot and clone have the same copy of cluster map at this point. + * Let's clear cluster map for snpashot now so that it won't be cleared + * for clone later when we remove snapshot. Also set thin provision to + * pass data corruption check */ + for (i = 0; i < ctx->blob->active.num_clusters; i++) { + ctx->blob->active.clusters[i] = 0; + } + + ctx->blob->md_ro = false; + + _spdk_blob_set_thin_provision(ctx->blob); + + ctx->blobid = ctx->blob->id; + + spdk_blob_close(ctx->blob, _spdk_bs_delete_corrupted_close_cb, ctx); +} + +static void +_spdk_bs_update_corrupted_blob(void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + + if (bserrno != 0) { + SPDK_ERRLOG("Failed to close clone of a corrupted blob\n"); + spdk_bs_iter_next(ctx->bs, ctx->blob, _spdk_bs_load_iter, ctx); + return; + } + + ctx->blob->md_ro = false; + _spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true); + spdk_blob_set_read_only(ctx->blob); + + if (ctx->iter_cb_fn) { + ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0); + } + _spdk_bs_blob_list_add(ctx->blob); + + spdk_bs_iter_next(ctx->bs, ctx->blob, _spdk_bs_load_iter, ctx); +} + +static void +_spdk_bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + + if (bserrno != 0) { + SPDK_ERRLOG("Failed to open clone of a corrupted blob\n"); + spdk_bs_iter_next(ctx->bs, ctx->blob, _spdk_bs_load_iter, ctx); + return; + } + + if (blob->parent_id == ctx->blob->id) { + /* Power failure occured before updating clone - keep snapshot */ + spdk_blob_close(blob, _spdk_bs_update_corrupted_blob, ctx); + } else { + /* Power failure occured after updating clone - remove snapshot */ + spdk_blob_close(blob, _spdk_bs_delete_corrupted_blob, ctx); + } +} + static void _spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno) { struct spdk_bs_load_ctx *ctx = arg; + const void *value; + size_t len; + int rc = 0; if (bserrno == 0) { - if (ctx->iter_cb_fn) { - ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0); + /* Examine blob if it is corrupted after power failure. Fix + * the ones that can be fixed and remove any other corrupted + * ones. If it is not corrupted just process it */ + rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true); + if (rc != 0) { + /* Not corrupted - process it and continue with iterating through blobs */ + if (ctx->iter_cb_fn) { + ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0); + } + _spdk_bs_blob_list_add(blob); + spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx); + return; } - _spdk_bs_blob_list_add(blob); - spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx); - return; - } - if (bserrno == -ENOENT) { + assert(len == sizeof(spdk_blob_id)); + + ctx->blob = blob; + + /* Open clone to check if we are able to fix this blob or should we remove it */ + spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, _spdk_bs_examine_clone, ctx); + return; + } else if (bserrno == -ENOENT) { bserrno = 0; } else { /* diff --git a/lib/blob/blobstore.h b/lib/blob/blobstore.h index 6aa00ddd2..08f903931 100644 --- a/lib/blob/blobstore.h +++ b/lib/blob/blobstore.h @@ -215,6 +215,7 @@ enum spdk_blob_op_type { #define BLOB_SNAPSHOT "SNAP" #define SNAPSHOT_IN_PROGRESS "SNAPTMP" +#define SNAPSHOT_PENDING_REMOVAL "SNAPRM" struct spdk_blob_bs_dev { struct spdk_bs_dev bs_dev; diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index 34fb13dcf..b1599d1bb 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -2734,6 +2734,144 @@ bs_load(void) } +static void +bs_load_pending_removal(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev *dev; + struct spdk_blob_opts opts; + struct spdk_blob *blob, *snapshot; + spdk_blob_id blobid, snapshotid; + const void *value; + size_t value_len; + int rc; + + dev = init_dev(); + + spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Create blob */ + spdk_blob_opts_init(&opts); + opts.num_clusters = 10; + + spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + /* Create snapshot */ + spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + snapshotid = g_blobid; + + spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + snapshot = g_blob; + + /* Set SNAPSHOT_PENDING_REMOVAL xattr */ + snapshot->md_ro = false; + rc = _spdk_blob_set_xattr(snapshot, SNAPSHOT_PENDING_REMOVAL, &blobid, sizeof(spdk_blob_id), true); + CU_ASSERT(rc == 0); + snapshot->md_ro = true; + + spdk_blob_close(snapshot, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + spdk_blob_close(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + /* Reload blobstore */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + + dev = init_dev(); + spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Snapshot should not be removed as blob is still pointing to it */ + spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + snapshot = g_blob; + + /* SNAPSHOT_PENDING_REMOVAL xattr should be removed during load */ + rc = spdk_blob_get_xattr_value(snapshot, SNAPSHOT_PENDING_REMOVAL, &value, &value_len); + CU_ASSERT(rc != 0); + + /* Set SNAPSHOT_PENDING_REMOVAL xattr again */ + snapshot->md_ro = false; + rc = _spdk_blob_set_xattr(snapshot, SNAPSHOT_PENDING_REMOVAL, &blobid, sizeof(spdk_blob_id), true); + CU_ASSERT(rc == 0); + snapshot->md_ro = true; + + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + /* Remove parent_id from blob by removing BLOB_SNAPSHOT xattr */ + _spdk_blob_remove_xattr(blob, BLOB_SNAPSHOT, true); + + spdk_blob_sync_md(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + spdk_blob_close(snapshot, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + spdk_blob_close(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + /* Reload blobstore */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + + dev = init_dev(); + spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Snapshot should be removed as blob is not pointing to it anymore */ + spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno != 0); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + static void bs_load_custom_cluster_size(void) { @@ -6638,6 +6776,7 @@ int main(int argc, char **argv) CU_add_test(suite, "blob_iter", blob_iter) == NULL || CU_add_test(suite, "blob_xattr", blob_xattr) == NULL || CU_add_test(suite, "bs_load", bs_load) == NULL || + CU_add_test(suite, "bs_load_pending_removal", bs_load_pending_removal) == NULL || CU_add_test(suite, "bs_load_custom_cluster_size", bs_load_custom_cluster_size) == NULL || CU_add_test(suite, "bs_unload", bs_unload) == NULL || CU_add_test(suite, "bs_cluster_sz", bs_cluster_sz) == NULL ||