blob: Make the files persisted cross power failure.

We only sync the metadata and data in the runtime of blobstore, which
means we only update the used md bitmap and used clusters bitmap in memory.
if the system crushed, we have no chance to sync the used md bitmap and
used clusters bitmap into disk, then next time when we try to load the
blobstore, all the data will lost, this patch add the logic to recover the
valid data from last dirty shutdown. We will go through all the metadata pages
to find all valid data and rebuild them.

Change-Id: Ieb7c5f932206b1b68fdde0cee35f2d2cb3a4f309
Signed-off-by: Cunyin Chang <cunyin.chang@intel.com>
Reviewed-on: https://review.gerrithub.io/376470
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Cunyin Chang 2017-08-31 10:33:39 +08:00 committed by Jim Harris
parent e739a50f48
commit 9f891d14ce
2 changed files with 509 additions and 25 deletions

View File

@ -1455,6 +1455,10 @@ struct spdk_bs_load_ctx {
struct spdk_bs_super_block *super;
struct spdk_bs_md_mask *mask;
bool in_page_chain;
uint32_t page_index;
uint32_t cur_page;
struct spdk_blob_md_page *page;
};
static void
@ -1640,15 +1644,6 @@ _spdk_bs_load_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno
struct spdk_bs_load_ctx *ctx = cb_arg;
uint64_t lba, lba_count, mask_size;
/* Parse the super block */
ctx->bs->cluster_sz = ctx->super->cluster_size;
ctx->bs->total_clusters = ctx->bs->dev->blockcnt / (ctx->bs->cluster_sz / ctx->bs->dev->blocklen);
ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
ctx->bs->md_start = ctx->super->md_start;
ctx->bs->md_len = ctx->super->md_len;
ctx->bs->super_blob = ctx->super->super_blob;
memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
/* Read the used pages mask */
mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
@ -1666,6 +1661,210 @@ _spdk_bs_load_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno
_spdk_bs_load_used_pages_cpl, ctx);
}
static int
_spdk_bs_load_replay_md_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob_store *bs)
{
struct spdk_blob_md_descriptor *desc;
size_t cur_desc = 0;
desc = (struct spdk_blob_md_descriptor *)page->descriptors;
while (cur_desc < sizeof(page->descriptors)) {
if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
if (desc->length == 0) {
/* If padding and length are 0, this terminates the page */
break;
}
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT) {
struct spdk_blob_md_descriptor_extent *desc_extent;
unsigned int i, j;
unsigned int cluster_count = 0;
desc_extent = (struct spdk_blob_md_descriptor_extent *)desc;
for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) {
for (j = 0; j < desc_extent->extents[i].length; j++) {
spdk_bit_array_set(bs->used_clusters, desc_extent->extents[i].cluster_idx + j);
if (bs->num_free_clusters == 0) {
return -1;
}
bs->num_free_clusters--;
cluster_count++;
}
}
if (cluster_count == 0) {
return -1;
}
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
/* Skip this item */
} else {
/* Error */
return -1;
}
/* Advance to the next descriptor */
cur_desc += sizeof(*desc) + desc->length;
if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
break;
}
desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
}
return 0;
}
static bool _spdk_bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
{
uint32_t crc;
crc = _spdk_blob_md_page_calc_crc(ctx->page);
if (crc != ctx->page->crc) {
return false;
}
if (_spdk_bs_page_to_blobid(ctx->cur_page) != ctx->page->id) {
return false;
}
return true;
}
static void
_spdk_bs_load_replay_cur_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
static void
_spdk_bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
spdk_dma_free(ctx->super);
spdk_bs_sequence_finish(seq, bserrno);
free(ctx);
}
static void
_spdk_bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl);
}
static void
_spdk_bs_load_write_used_md(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
_spdk_bs_write_used_md(seq, cb_arg, _spdk_bs_load_write_used_pages_cpl);
}
static void
_spdk_bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
uint32_t page_num;
if (bserrno != 0) {
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, bserrno);
return;
}
page_num = ctx->cur_page;
if (_spdk_bs_load_cur_md_page_valid(ctx) == true) {
if (ctx->page->sequence_num == 0 || ctx->in_page_chain == true) {
spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
if (_spdk_bs_load_replay_md_parse_page(ctx->page, ctx->bs)) {
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, -EILSEQ);
return;
}
if (ctx->page->next != SPDK_INVALID_MD_PAGE) {
ctx->in_page_chain = true;
ctx->cur_page = ctx->page->next;
_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
return;
}
}
}
ctx->in_page_chain = false;
do {
ctx->page_index++;
} while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
if (ctx->page_index < ctx->super->md_len) {
ctx->cur_page = ctx->page_index;
_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
} else {
spdk_dma_free(ctx->page);
_spdk_bs_load_write_used_md(seq, ctx, bserrno);
}
}
static void
_spdk_bs_load_replay_cur_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
uint64_t lba;
assert(ctx->cur_page < ctx->super->md_len);
lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
spdk_bs_sequence_read(seq, ctx->page, lba,
_spdk_bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
_spdk_bs_load_replay_md_cpl, ctx);
}
static void
_spdk_bs_load_replay_md(spdk_bs_sequence_t *seq, void *cb_arg)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
ctx->page_index = 0;
ctx->cur_page = 0;
ctx->page = spdk_dma_zmalloc(SPDK_BS_PAGE_SIZE,
SPDK_BS_PAGE_SIZE,
NULL);
if (!ctx->page) {
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, -ENOMEM);
return;
}
_spdk_bs_load_replay_cur_md_page(seq, cb_arg);
}
static void
_spdk_bs_recover(spdk_bs_sequence_t *seq, void *cb_arg)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
int rc;
rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
if (rc < 0) {
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, -ENOMEM);
return;
}
rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters);
if (rc < 0) {
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, -ENOMEM);
return;
}
ctx->bs->num_free_clusters = ctx->bs->total_clusters;
_spdk_bs_load_replay_md(seq, cb_arg);
}
static void
_spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
@ -1715,23 +1914,21 @@ _spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
return;
}
if (ctx->super->clean != 1) {
/* TODO: ONLY CLEAN SHUTDOWN IS CURRENTLY SUPPORTED.
* All of the necessary data to recover is available
* on disk - the code just has not been written yet.
*/
assert(false);
spdk_dma_free(ctx->super);
_spdk_bs_free(ctx->bs);
free(ctx);
spdk_bs_sequence_finish(seq, -EILSEQ);
return;
}
/* Parse the super block */
ctx->bs->cluster_sz = ctx->super->cluster_size;
ctx->bs->total_clusters = ctx->bs->dev->blockcnt / (ctx->bs->cluster_sz / ctx->bs->dev->blocklen);
ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
ctx->bs->md_start = ctx->super->md_start;
ctx->bs->md_len = ctx->super->md_len;
ctx->bs->super_blob = ctx->super->super_blob;
memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
ctx->super->clean = 0;
_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_load_write_super_cpl, ctx);
if (ctx->super->clean == 1) {
ctx->super->clean = 0;
_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_load_write_super_cpl, ctx);
} else {
_spdk_bs_recover(seq, ctx);
}
}
void

View File

@ -1449,6 +1449,292 @@ super_block_crc(void)
g_scheduler_delay = false;
}
/* For blob dirty shutdown test case we do the following sub-test cases:
* 1 Initialize new blob store and create 1 blob with some xattrs, then we
* dirty shutdown and reload the blob store and verify the xattrs.
* 2 Resize the blob from 10 clusters to 20 clusters and then dirty shutdown,
* reload the blob store and verify the clusters number.
* 3 Create the second blob and then dirty shutdown, reload the blob store
* and verify the second blob.
* 4 Delete the second blob and then dirty shutdown, reload teh blob store
* and verify the second blob is invalid.
* 5 Create the second blob again and also create the third blob, modify the
* md of second blob which makes the md invalid, and then dirty shutdown,
* reload the blob store verify the second blob, it should invalid and also
* verify the third blob, it should correct.
*/
static void
blob_dirty_shutdown(void)
{
int rc;
int index;
struct spdk_bs_dev *dev;
spdk_blob_id blobid1, blobid2, blobid3;
struct spdk_blob *blob;
uint64_t length;
const void *value;
size_t value_len;
uint32_t page_num;
struct spdk_blob_md_page *page;
struct spdk_bs_opts opts;
dev = init_dev();
spdk_bs_opts_init(&opts);
/* Initialize a new blob store */
spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
/* Create first blob */
spdk_bs_md_create_blob(g_bs, blob_op_with_id_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid1 = g_blobid;
spdk_bs_md_open_blob(g_bs, blobid1, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Set some xattrs */
rc = spdk_blob_md_set_xattr(blob, "name", "log.txt", strlen("log.txt") + 1);
CU_ASSERT(rc == 0);
length = 2345;
rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length));
CU_ASSERT(rc == 0);
/* Resize the blob */
rc = spdk_bs_md_resize_blob(blob, 10);
CU_ASSERT(rc == 0);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
/* Dirty shutdown */
_spdk_bs_free(g_bs);
/* reload blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_md_open_blob(g_bs, blobid1, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Get the xattrs */
value = NULL;
rc = spdk_bs_md_get_xattr_value(blob, "length", &value, &value_len);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(value != NULL);
CU_ASSERT(*(uint64_t *)value == length);
CU_ASSERT(value_len == 8);
CU_ASSERT(spdk_blob_get_num_clusters(blob) == 10);
/* Resize the blob */
rc = spdk_bs_md_resize_blob(blob, 20);
CU_ASSERT(rc == 0);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
/* Dirty shutdown */
_spdk_bs_free(g_bs);
/* reload the blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
/* Load an existing blob store */
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
spdk_bs_md_open_blob(g_bs, blobid1, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
CU_ASSERT(spdk_blob_get_num_clusters(blob) == 20);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
/* Create second blob */
spdk_bs_md_create_blob(g_bs, blob_op_with_id_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid2 = g_blobid;
spdk_bs_md_open_blob(g_bs, blobid2, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Set some xattrs */
rc = spdk_blob_md_set_xattr(blob, "name", "log1.txt", strlen("log1.txt") + 1);
CU_ASSERT(rc == 0);
length = 5432;
rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length));
CU_ASSERT(rc == 0);
/* Resize the blob */
rc = spdk_bs_md_resize_blob(blob, 10);
CU_ASSERT(rc == 0);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
/* Dirty shutdown */
_spdk_bs_free(g_bs);
/* reload the blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_md_open_blob(g_bs, blobid2, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Get the xattrs */
value = NULL;
rc = spdk_bs_md_get_xattr_value(blob, "length", &value, &value_len);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(value != NULL);
CU_ASSERT(*(uint64_t *)value == length);
CU_ASSERT(value_len == 8);
CU_ASSERT(spdk_blob_get_num_clusters(blob) == 10);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_md_delete_blob(g_bs, blobid2, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
/* Dirty shutdown */
_spdk_bs_free(g_bs);
/* reload the blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_md_open_blob(g_bs, blobid2, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno != 0);
CU_ASSERT(g_blob == NULL);
spdk_bs_md_open_blob(g_bs, blobid1, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
spdk_bs_md_close_blob(&g_blob, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_unload(g_bs, bs_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
/* reload the blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
/* Create second blob */
spdk_bs_md_create_blob(g_bs, blob_op_with_id_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid2 = g_blobid;
/* Create third blob */
spdk_bs_md_create_blob(g_bs, blob_op_with_id_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid3 = g_blobid;
spdk_bs_md_open_blob(g_bs, blobid2, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Set some xattrs for second blob */
rc = spdk_blob_md_set_xattr(blob, "name", "log1.txt", strlen("log1.txt") + 1);
CU_ASSERT(rc == 0);
length = 5432;
rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length));
CU_ASSERT(rc == 0);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
spdk_bs_md_open_blob(g_bs, blobid3, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
/* Set some xattrs for third blob */
rc = spdk_blob_md_set_xattr(blob, "name", "log2.txt", strlen("log2.txt") + 1);
CU_ASSERT(rc == 0);
length = 5432;
rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length));
CU_ASSERT(rc == 0);
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
/* Mark second blob as invalid */
page_num = _spdk_bs_blobid_to_page(blobid2);
index = DEV_BUFFER_BLOCKLEN * (g_bs->md_start + page_num);
page = (struct spdk_blob_md_page *)&g_dev_buffer[index];
page->sequence_num = 1;
page->crc = _spdk_blob_md_page_calc_crc(page);
/* Dirty shutdown */
_spdk_bs_free(g_bs);
/* reload the blobstore */
dev = init_dev();
spdk_bs_opts_init(&opts);
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_md_open_blob(g_bs, blobid2, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno != 0);
CU_ASSERT(g_blob == NULL);
spdk_bs_md_open_blob(g_bs, blobid3, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
blob = g_blob;
spdk_bs_md_close_blob(&blob, blob_op_complete, NULL);
blob = NULL;
g_blob = NULL;
g_blobid = SPDK_BLOBID_INVALID;
spdk_bs_unload(g_bs, bs_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
}
int main(int argc, char **argv)
{
CU_pSuite suite = NULL;
@ -1486,7 +1772,8 @@ int main(int argc, char **argv)
CU_add_test(suite, "bs_super_block", bs_super_block) == NULL ||
CU_add_test(suite, "blob_serialize", blob_serialize) == NULL ||
CU_add_test(suite, "blob_crc", blob_crc) == NULL ||
CU_add_test(suite, "super_block_crc", super_block_crc) == NULL
CU_add_test(suite, "super_block_crc", super_block_crc) == NULL ||
CU_add_test(suite, "blob_dirty_shutdown", blob_dirty_shutdown) == NULL
) {
CU_cleanup_registry();
return CU_get_error();