blobstore: Cleanup after power failure while creating snapshot
Currently we are missing cleanup routine for case when power failure interrupts creating snapshot. This patch add such routine. For the case where we find blob with a parent snapshot ID matching newly created snapshot we can finish whole process during recovery by processing forward with setting snpashot as read only, removing xattr and syncing. We should remove snapshot only if there is no blob with parent pointing at snapshot. Fixes github issue #760 Signed-off-by: Maciej Szwed <maciej.szwed@intel.com> Change-Id: I2f0e298164e07a2b4dfa5367e8878facef640702 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455216 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
parent
a6b199224c
commit
e8356fd233
@ -2782,6 +2782,7 @@ _spdk_bs_update_corrupted_blob(void *cb_arg, int bserrno)
|
||||
|
||||
ctx->blob->md_ro = false;
|
||||
_spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
|
||||
_spdk_blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
|
||||
spdk_blob_set_read_only(ctx->blob);
|
||||
|
||||
if (ctx->iter_cb_fn) {
|
||||
@ -2804,10 +2805,12 @@ _spdk_bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
|
||||
}
|
||||
|
||||
if (blob->parent_id == ctx->blob->id) {
|
||||
/* Power failure occured before updating clone - keep snapshot */
|
||||
/* Power failure occured before updating clone (snapshot delete case)
|
||||
* or after updating clone (creating snapshot case) - keep snapshot */
|
||||
spdk_blob_close(blob, _spdk_bs_update_corrupted_blob, ctx);
|
||||
} else {
|
||||
/* Power failure occured after updating clone - remove snapshot */
|
||||
/* Power failure occured after updating clone (snapshot delete case)
|
||||
* or before updating clone (creating snapshot case) - remove snapshot */
|
||||
spdk_blob_close(blob, _spdk_bs_delete_corrupted_blob, ctx);
|
||||
}
|
||||
}
|
||||
@ -2826,13 +2829,17 @@ _spdk_bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
|
||||
* ones. If it is not corrupted just process it */
|
||||
rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
|
||||
if (rc != 0) {
|
||||
/* Not corrupted - process it and continue with iterating through blobs */
|
||||
if (ctx->iter_cb_fn) {
|
||||
ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
|
||||
rc = _spdk_blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
|
||||
if (rc != 0) {
|
||||
/* Not corrupted - process it and continue with iterating through blobs */
|
||||
if (ctx->iter_cb_fn) {
|
||||
ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
|
||||
}
|
||||
_spdk_bs_blob_list_add(blob);
|
||||
spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx);
|
||||
return;
|
||||
}
|
||||
_spdk_bs_blob_list_add(blob);
|
||||
spdk_bs_iter_next(ctx->bs, blob, _spdk_bs_load_iter, ctx);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
assert(len == sizeof(spdk_blob_id));
|
||||
|
@ -6355,6 +6355,120 @@ blob_delete_snapshot_power_failure(void)
|
||||
g_bs = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
blob_create_snapshot_power_failure(void)
|
||||
{
|
||||
struct spdk_blob_store *bs;
|
||||
struct spdk_bs_dev *dev;
|
||||
struct spdk_blob_opts opts;
|
||||
struct spdk_blob *blob, *snapshot;
|
||||
struct spdk_power_failure_thresholds thresholds = {};
|
||||
spdk_blob_id blobid, snapshotid;
|
||||
const void *value;
|
||||
size_t value_len;
|
||||
size_t count;
|
||||
spdk_blob_id ids[3] = {};
|
||||
int rc;
|
||||
bool created = false;
|
||||
|
||||
dev = init_dev();
|
||||
|
||||
spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
|
||||
bs = g_bs;
|
||||
|
||||
/* Create blob */
|
||||
spdk_blob_opts_init(&opts);
|
||||
opts.num_clusters = 10;
|
||||
|
||||
spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
|
||||
blobid = g_blobid;
|
||||
|
||||
thresholds.general_threshold = 1;
|
||||
while (!created) {
|
||||
dev_set_power_failure_thresholds(thresholds);
|
||||
|
||||
/* Create snapshot */
|
||||
spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno != 0);
|
||||
snapshotid = g_blobid;
|
||||
|
||||
spdk_bs_unload(g_bs, bs_op_complete, NULL);
|
||||
poll_threads();
|
||||
|
||||
dev_reset_power_failure_event();
|
||||
|
||||
dev = init_dev();
|
||||
spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
|
||||
bs = g_bs;
|
||||
|
||||
spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
|
||||
blob = g_blob;
|
||||
|
||||
if (snapshotid != SPDK_BLOBID_INVALID) {
|
||||
spdk_bs_open_blob(bs, snapshotid, blob_op_with_handle_complete, NULL);
|
||||
poll_threads();
|
||||
}
|
||||
|
||||
if ((snapshotid != SPDK_BLOBID_INVALID) && (g_bserrno == 0)) {
|
||||
SPDK_CU_ASSERT_FATAL(g_blob != NULL);
|
||||
snapshot = g_blob;
|
||||
CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == snapshotid);
|
||||
count = SPDK_COUNTOF(ids);
|
||||
rc = spdk_blob_get_clones(bs, snapshotid, ids, &count);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(count == 1);
|
||||
CU_ASSERT(ids[0] == blobid);
|
||||
rc = spdk_blob_get_xattr_value(snapshot, SNAPSHOT_IN_PROGRESS, &value, &value_len);
|
||||
CU_ASSERT(rc != 0);
|
||||
|
||||
spdk_blob_close(snapshot, blob_op_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
created = true;
|
||||
} else {
|
||||
CU_ASSERT(spdk_blob_get_parent_snapshot(bs, blobid) == SPDK_BLOBID_INVALID);
|
||||
CU_ASSERT(!(blob->invalid_flags & SPDK_BLOB_THIN_PROV));
|
||||
}
|
||||
|
||||
spdk_blob_close(blob, blob_op_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
|
||||
/* Reload blobstore to have the same starting conditions (as the previous blobstore load
|
||||
* may trigger cleanup after power failure or may not) */
|
||||
spdk_bs_unload(g_bs, bs_op_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
|
||||
dev = init_dev();
|
||||
spdk_bs_load(dev, NULL, bs_op_with_handle_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
|
||||
bs = g_bs;
|
||||
|
||||
thresholds.general_threshold++;
|
||||
}
|
||||
|
||||
spdk_bs_unload(g_bs, bs_op_complete, NULL);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bserrno == 0);
|
||||
g_bs = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
test_io_write(struct spdk_bs_dev *dev, struct spdk_blob *blob, struct spdk_io_channel *channel)
|
||||
{
|
||||
@ -7317,6 +7431,8 @@ int main(int argc, char **argv)
|
||||
CU_add_test(suite, "blob_relations2", blob_relations2) == NULL ||
|
||||
CU_add_test(suite, "blob_delete_snapshot_power_failure",
|
||||
blob_delete_snapshot_power_failure) == NULL ||
|
||||
CU_add_test(suite, "blob_create_snapshot_power_failure",
|
||||
blob_create_snapshot_power_failure) == NULL ||
|
||||
CU_add_test(suite, "blob_inflate_rw", blob_inflate_rw) == NULL ||
|
||||
CU_add_test(suite, "blob_snapshot_freeze_io", blob_snapshot_freeze_io) == NULL ||
|
||||
CU_add_test(suite, "blob_operation_split_rw", blob_operation_split_rw) == NULL ||
|
||||
|
Loading…
Reference in New Issue
Block a user