From e8356fd2333369c126e2d6529a5a43f87e85ef18 Mon Sep 17 00:00:00 2001 From: Maciej Szwed Date: Tue, 21 May 2019 09:50:30 +0200 Subject: [PATCH] blobstore: Cleanup after power failure while creating snapshot Currently we are missing cleanup routine for case when power failure interrupts creating snapshot. This patch add such routine. For the case where we find blob with a parent snapshot ID matching newly created snapshot we can finish whole process during recovery by processing forward with setting snpashot as read only, removing xattr and syncing. We should remove snapshot only if there is no blob with parent pointing at snapshot. Fixes github issue #760 Signed-off-by: Maciej Szwed Change-Id: I2f0e298164e07a2b4dfa5367e8878facef640702 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455216 Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Darek Stojaczyk Reviewed-by: Ben Walker Reviewed-by: Tomasz Zawadzki --- lib/blob/blobstore.c | 23 ++++-- test/unit/lib/blob/blob.c/blob_ut.c | 116 ++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index 3c94bcddb..d0f45569a 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -2782,6 +2782,7 @@ _spdk_bs_update_corrupted_blob(void *cb_arg, int bserrno) ctx->blob->md_ro = false; _spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true); + _spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true); spdk_blob_set_read_only(ctx->blob); if (ctx->iter_cb_fn) { @@ -2804,10 +2805,12 @@ _spdk_bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno) } if (blob->parent_id == ctx->blob->id) { - /* Power failure occured before updating clone - keep snapshot */ + /* Power failure occured before updating clone (snapshot delete case) + * or after updating clone (creating snapshot case) - keep snapshot */ spdk_blob_close(blob, _spdk_bs_update_corrupted_blob, ctx); } else { - /* Power failure occured after updating clone - remove snapshot */ + /* Power failure occured after updating clone (snapshot delete case) + * or before updating clone (creating snapshot case) - remove snapshot */ spdk_blob_close(blob, _spdk_bs_delete_corrupted_blob, ctx); } } @@ -2826,13 +2829,17 @@ _spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno) * ones. If it is not corrupted just process it */ rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true); if (rc != 0) { - /* Not corrupted - process it and continue with iterating through blobs */ - if (ctx->iter_cb_fn) { - ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0); + rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true); + if (rc != 0) { + /* Not corrupted - process it and continue with iterating through blobs */ + if (ctx->iter_cb_fn) { + ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0); + } + _spdk_bs_blob_list_add(blob); + spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx); + return; } - _spdk_bs_blob_list_add(blob); - spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx); - return; + } assert(len == sizeof(spdk_blob_id)); diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index 2f79c789e..779ece785 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -6355,6 +6355,120 @@ blob_delete_snapshot_power_failure(void) g_bs = NULL; } +static void +blob_create_snapshot_power_failure(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev *dev; + struct spdk_blob_opts opts; + struct spdk_blob *blob, *snapshot; + struct spdk_power_failure_thresholds thresholds = {}; + spdk_blob_id blobid, snapshotid; + const void *value; + size_t value_len; + size_t count; + spdk_blob_id ids[3] = {}; + int rc; + bool created = false; + + dev = init_dev(); + + spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Create blob */ + spdk_blob_opts_init(&opts); + opts.num_clusters = 10; + + spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + thresholds.general_threshold = 1; + while (!created) { + dev_set_power_failure_thresholds(thresholds); + + /* Create snapshot */ + spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno != 0); + snapshotid = g_blobid; + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + + dev_reset_power_failure_event(); + + dev = init_dev(); + spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + blob = g_blob; + + if (snapshotid != SPDK_BLOBID_INVALID) { + spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL); + poll_threads(); + } + + if ((snapshotid != SPDK_BLOBID_INVALID) && (g_bserrno == 0)) { + SPDK_CU_ASSERT_FATAL(g_blob != NULL); + snapshot = g_blob; + CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == snapshotid); + count = SPDK_COUNTOF(ids); + rc = spdk_blob_get_clones(bs, snapshotid, ids, &count); + CU_ASSERT(rc == 0); + CU_ASSERT(count == 1); + CU_ASSERT(ids[0] == blobid); + rc = spdk_blob_get_xattr_value(snapshot, SNAPSHOT_IN_PROGRESS, &value, &value_len); + CU_ASSERT(rc != 0); + + spdk_blob_close(snapshot, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + created = true; + } else { + CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == SPDK_BLOBID_INVALID); + CU_ASSERT(!(blob->invalid_flags & SPDK_BLOB_THIN_PROV)); + } + + spdk_blob_close(blob, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + /* Reload blobstore to have the same starting conditions (as the previous blobstore load + * may trigger cleanup after power failure or may not) */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + dev = init_dev(); + spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + thresholds.general_threshold++; + } + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + static void test_io_write(struct spdk_bs_dev *dev, struct spdk_blob *blob, struct spdk_io_channel *channel) { @@ -7317,6 +7431,8 @@ int main(int argc, char **argv) CU_add_test(suite, "blob_relations2", blob_relations2) == NULL || CU_add_test(suite, "blob_delete_snapshot_power_failure", blob_delete_snapshot_power_failure) == NULL || + CU_add_test(suite, "blob_create_snapshot_power_failure", + blob_create_snapshot_power_failure) == NULL || CU_add_test(suite, "blob_inflate_rw", blob_inflate_rw) == NULL || CU_add_test(suite, "blob_snapshot_freeze_io", blob_snapshot_freeze_io) == NULL || CU_add_test(suite, "blob_operation_split_rw", blob_operation_split_rw) == NULL ||