blobstore: Cleanup after power failure while creating snapshot

Currently we are missing cleanup routine for case when
power failure interrupts creating snapshot. This patch
add such routine.

For the case where we find blob with a parent snapshot ID
matching newly created snapshot we can finish whole process
during recovery by processing forward with setting snpashot
as read only, removing xattr and syncing. We should remove
snapshot only if there is no blob with parent pointing at
snapshot.

Fixes github issue #760

Signed-off-by: Maciej Szwed <maciej.szwed@intel.com>
Change-Id: I2f0e298164e07a2b4dfa5367e8878facef640702
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455216
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Maciej Szwed 2019-05-21 09:50:30 +02:00 committed by Darek Stojaczyk
parent a6b199224c
commit e8356fd233
2 changed files with 131 additions and 8 deletions

View File

@ -2782,6 +2782,7 @@ _spdk_bs_update_corrupted_blob(void *cb_arg, int bserrno)
ctx->blob->md_ro = false;
_spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
_spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
spdk_blob_set_read_only(ctx->blob);
if (ctx->iter_cb_fn) {
@ -2804,10 +2805,12 @@ _spdk_bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
}
if (blob->parent_id == ctx->blob->id) {
/* Power failure occured before updating clone - keep snapshot */
/* Power failure occured before updating clone (snapshot delete case)
* or after updating clone (creating snapshot case) - keep snapshot */
spdk_blob_close(blob, _spdk_bs_update_corrupted_blob, ctx);
} else {
/* Power failure occured after updating clone - remove snapshot */
/* Power failure occured after updating clone (snapshot delete case)
* or before updating clone (creating snapshot case) - remove snapshot */
spdk_blob_close(blob, _spdk_bs_delete_corrupted_blob, ctx);
}
}
@ -2826,13 +2829,17 @@ _spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
* ones. If it is not corrupted just process it */
rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
if (rc != 0) {
/* Not corrupted - process it and continue with iterating through blobs */
if (ctx->iter_cb_fn) {
ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
if (rc != 0) {
/* Not corrupted - process it and continue with iterating through blobs */
if (ctx->iter_cb_fn) {
ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
}
_spdk_bs_blob_list_add(blob);
spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx);
return;
}
_spdk_bs_blob_list_add(blob);
spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx);
return;
}
assert(len == sizeof(spdk_blob_id));

View File

@ -6355,6 +6355,120 @@ blob_delete_snapshot_power_failure(void)
g_bs = NULL;
}
static void
blob_create_snapshot_power_failure(void)
{
struct spdk_blob_store *bs;
struct spdk_bs_dev *dev;
struct spdk_blob_opts opts;
struct spdk_blob *blob, *snapshot;
struct spdk_power_failure_thresholds thresholds = {};
spdk_blob_id blobid, snapshotid;
const void *value;
size_t value_len;
size_t count;
spdk_blob_id ids[3] = {};
int rc;
bool created = false;
dev = init_dev();
spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
bs = g_bs;
/* Create blob */
spdk_blob_opts_init(&opts);
opts.num_clusters = 10;
spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
thresholds.general_threshold = 1;
while (!created) {
dev_set_power_failure_thresholds(thresholds);
/* Create snapshot */
spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno != 0);
snapshotid = g_blobid;
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
dev_reset_power_failure_event();
dev = init_dev();
spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
bs = g_bs;
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
blob = g_blob;
if (snapshotid != SPDK_BLOBID_INVALID) {
spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL);
poll_threads();
}
if ((snapshotid != SPDK_BLOBID_INVALID) && (g_bserrno == 0)) {
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
snapshot = g_blob;
CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == snapshotid);
count = SPDK_COUNTOF(ids);
rc = spdk_blob_get_clones(bs, snapshotid, ids, &count);
CU_ASSERT(rc == 0);
CU_ASSERT(count == 1);
CU_ASSERT(ids[0] == blobid);
rc = spdk_blob_get_xattr_value(snapshot, SNAPSHOT_IN_PROGRESS, &value, &value_len);
CU_ASSERT(rc != 0);
spdk_blob_close(snapshot, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
created = true;
} else {
CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == SPDK_BLOBID_INVALID);
CU_ASSERT(!(blob->invalid_flags & SPDK_BLOB_THIN_PROV));
}
spdk_blob_close(blob, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
/* Reload blobstore to have the same starting conditions (as the previous blobstore load
* may trigger cleanup after power failure or may not) */
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
dev = init_dev();
spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
bs = g_bs;
thresholds.general_threshold++;
}
spdk_bs_unload(g_bs, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
}
static void
test_io_write(struct spdk_bs_dev *dev, struct spdk_blob *blob, struct spdk_io_channel *channel)
{
@ -7317,6 +7431,8 @@ int main(int argc, char **argv)
CU_add_test(suite, "blob_relations2", blob_relations2) == NULL ||
CU_add_test(suite, "blob_delete_snapshot_power_failure",
blob_delete_snapshot_power_failure) == NULL ||
CU_add_test(suite, "blob_create_snapshot_power_failure",
blob_create_snapshot_power_failure) == NULL ||
CU_add_test(suite, "blob_inflate_rw", blob_inflate_rw) == NULL ||
CU_add_test(suite, "blob_snapshot_freeze_io", blob_snapshot_freeze_io) == NULL ||
CU_add_test(suite, "blob_operation_split_rw", blob_operation_split_rw) == NULL ||