From 40c911b9572fe5cda7eccb1408bae9e9efef9cad Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Tue, 12 Dec 2017 15:19:46 -0700 Subject: [PATCH] blob: add used blobid bit array for valid blobids This can be used for two purposes: 1) more quickly iterate the blob list, avoiding metadata pages that are valid but not the first page in the blob's metadata list 2) close races between delete and open operations - now we can clear the bit in the blobid bit array when the delete operation is in progress, ensuring no one else can try to open the blob Signed-off-by: Jim Harris Change-Id: I3904648fd6fa656cb98c9e17ea763ed5a84ef537 Reviewed-on: https://review.gerrithub.io/391695 Tested-by: SPDK Automated Test System Reviewed-by: Ben Walker Reviewed-by: Paul Luse Reviewed-by: Daniel Verkamp --- doc/blob.md | 5 +- lib/blob/blobstore.c | 182 +++++++++++++++++++++++++--- lib/blob/blobstore.h | 7 +- test/unit/lib/blob/blob.c/blob_ut.c | 36 ++++++ 4 files changed, 211 insertions(+), 19 deletions(-) diff --git a/doc/blob.md b/doc/blob.md index 1bb016929..531079a8d 100644 --- a/doc/blob.md +++ b/doc/blob.md @@ -176,7 +176,8 @@ It contains basic information about the blobstore. The metadata region is the remainder of cluster 0 and may extend to additional clusters. ::= - + + ::= u32 ::= u32 # Length of this super block, in bytes. Starts from the # beginning of this structure. @@ -185,6 +186,8 @@ is the remainder of cluster 0 and may extend to additional clusters. ::= u64 # Metadata start location, in pages ::= u64 # Metadata length, in pages + ::= u32 # Start of bitmask of valid blobids (in pages) + ::= u32 # Lenget of bitmask of valid blobids (in pages) ::= u32 # Crc for super block The `` data contains parameters specified by the user when the blob diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index f7ba8773f..dd36a992f 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -1455,6 +1455,7 @@ _spdk_bs_dev_destroy(void *io_device) _spdk_blob_free(blob); } + spdk_bit_array_free(&bs->used_blobids); spdk_bit_array_free(&bs->used_md_pages); spdk_bit_array_free(&bs->used_clusters); /* @@ -1543,12 +1544,14 @@ _spdk_bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts) /* The metadata is assumed to be at least 1 page */ bs->used_md_pages = spdk_bit_array_create(1); + bs->used_blobids = spdk_bit_array_create(0); spdk_io_device_register(bs, _spdk_bs_channel_create, _spdk_bs_channel_destroy, sizeof(struct spdk_bs_channel)); rc = spdk_bs_register_md_thread(bs); if (rc == -1) { spdk_io_device_unregister(bs, NULL); + spdk_bit_array_free(&bs->used_blobids); spdk_bit_array_free(&bs->used_md_pages); spdk_bit_array_free(&bs->used_clusters); free(bs); @@ -1664,10 +1667,85 @@ _spdk_bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, cb_fn, arg); } +static void +_spdk_bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn) +{ + struct spdk_bs_load_ctx *ctx = arg; + uint64_t mask_size, lba, lba_count; + + if (ctx->super->used_blobid_mask_len == 0) { + /* + * This is a pre-v3 on-disk format where the blobid mask does not get + * written to disk. + */ + cb_fn(seq, arg, 0); + return; + } + + mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE; + ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL); + if (!ctx->mask) { + _spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM); + return; + } + + ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS; + ctx->mask->length = ctx->super->md_len; + assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids)); + + _spdk_bs_set_mask(ctx->bs->used_blobids, ctx->mask); + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len); + spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, cb_fn, arg); +} + +static void +_spdk_bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + uint32_t i, j; + int rc; + + /* The type must be correct */ + assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS); + + /* The length of the mask (in bits) must not be greater than + * the length of the buffer (converted to bits) */ + assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8)); + + /* The length of the mask must be exactly equal to the size + * (in pages) of the metadata region */ + assert(ctx->mask->length == ctx->super->md_len); + + rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length); + if (rc < 0) { + spdk_dma_free(ctx->mask); + _spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM); + return; + } + + for (i = 0; i < ctx->mask->length / 8; i++) { + uint8_t segment = ctx->mask->mask[i]; + for (j = 0; segment; j++) { + if (segment & 1U) { + spdk_bit_array_set(ctx->bs->used_blobids, (i * 8) + j); + } + segment >>= 1U; + } + } + + spdk_dma_free(ctx->super); + spdk_dma_free(ctx->mask); + free(ctx); + + spdk_bs_sequence_finish(seq, bserrno); +} + static void _spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) { struct spdk_bs_load_ctx *ctx = cb_arg; + uint64_t lba, lba_count, mask_size; uint32_t i, j; int rc; @@ -1699,11 +1777,19 @@ _spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserr } } - spdk_dma_free(ctx->super); spdk_dma_free(ctx->mask); - free(ctx); - spdk_bs_sequence_finish(seq, bserrno); + /* Read the used blobids mask */ + mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE; + ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL); + if (!ctx->mask) { + _spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM); + return; + } + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len); + spdk_bs_sequence_read(seq, ctx->mask, lba, lba_count, + _spdk_bs_load_used_blobids_cpl, ctx); } static void @@ -1853,14 +1939,26 @@ _spdk_bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int free(ctx); } +static void +_spdk_bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + + spdk_dma_free(ctx->mask); + ctx->mask = NULL; + + _spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl); +} + static void _spdk_bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) { struct spdk_bs_load_ctx *ctx = cb_arg; spdk_dma_free(ctx->mask); + ctx->mask = NULL; - _spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl); + _spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_load_write_used_blobids_cpl); } static void @@ -1884,6 +1982,9 @@ _spdk_bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) if (_spdk_bs_load_cur_md_page_valid(ctx) == true) { if (ctx->page->sequence_num == 0 || ctx->in_page_chain == true) { spdk_bit_array_set(ctx->bs->used_md_pages, page_num); + if (ctx->page->sequence_num == 0) { + spdk_bit_array_set(ctx->bs->used_blobids, page_num); + } if (_spdk_bs_load_replay_md_parse_page(ctx->page, ctx->bs)) { _spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ); return; @@ -1959,6 +2060,12 @@ _spdk_bs_recover(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) return; } + rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len); + if (rc < 0) { + _spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM); + return; + } + rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters); if (rc < 0) { _spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM); @@ -2017,11 +2124,19 @@ _spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) ctx->bs->super_blob = ctx->super->super_blob; memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype)); - if (ctx->super->clean == 1) { + if (ctx->super->clean == 0) { + _spdk_bs_recover(seq, ctx, 0); + } else if (ctx->super->used_blobid_mask_len == 0) { + /* + * Metadata is clean, but this is an old metadata format without + * a blobid mask. Clear the clean bit and then build the masks + * using _spdk_bs_recover. + */ + ctx->super->clean = 0; + _spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_recover, ctx); + } else { ctx->super->clean = 0; _spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_load_write_super_cpl, ctx); - } else { - _spdk_bs_recover(seq, ctx, 0); } } @@ -2187,6 +2302,13 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o, return; } + rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len); + if (rc < 0) { + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + ctx = calloc(1, sizeof(*ctx)); if (!ctx) { _spdk_bs_free(bs); @@ -2237,10 +2359,20 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o, SPDK_BS_PAGE_SIZE); num_md_pages += ctx->super->used_cluster_mask_len; + /* The used_blobids mask requires 1 bit per metadata page, rounded + * up to the nearest page, plus a header. + */ + ctx->super->used_blobid_mask_start = num_md_pages; + ctx->super->used_blobid_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) + + divide_round_up(bs->md_len, 8), + SPDK_BS_PAGE_SIZE); + num_md_pages += ctx->super->used_blobid_mask_len; + /* The metadata region size was chosen above */ ctx->super->md_start = bs->md_start = num_md_pages; ctx->super->md_len = bs->md_len; num_md_pages += bs->md_len; + num_md_lba = _spdk_bs_page_to_lba(bs, num_md_pages); ctx->super->crc = _spdk_blob_md_page_calc_crc(ctx->super); @@ -2389,14 +2521,26 @@ _spdk_bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, i _spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_unload_write_super_cpl, ctx); } +static void +_spdk_bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + + spdk_dma_free(ctx->mask); + ctx->mask = NULL; + + _spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_unload_write_used_clusters_cpl); +} + static void _spdk_bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) { struct spdk_bs_load_ctx *ctx = cb_arg; spdk_dma_free(ctx->mask); + ctx->mask = NULL; - _spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_unload_write_used_clusters_cpl); + _spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_unload_write_used_blobids_cpl); } static void @@ -2573,6 +2717,7 @@ void spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_ cb_fn(cb_arg, 0, -ENOMEM); return; } + spdk_bit_array_set(bs->used_blobids, page_idx); spdk_bit_array_set(bs->used_md_pages, page_idx); id = _spdk_bs_page_to_blobid(page_idx); @@ -2686,6 +2831,7 @@ _spdk_bs_delete_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno) { spdk_bs_sequence_t *seq = cb_arg; struct spdk_blob_data *blob = __blob_to_data(_blob); + uint32_t page_num; if (bserrno != 0) { spdk_bs_sequence_finish(seq, bserrno); @@ -2707,6 +2853,8 @@ _spdk_bs_delete_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno) * get returned after this point by _spdk_blob_lookup(). */ TAILQ_REMOVE(&blob->bs->blobs, blob, link); + page_num = _spdk_bs_blobid_to_page(blob->id); + spdk_bit_array_clear(blob->bs->used_blobids, page_num); blob->state = SPDK_BLOB_STATE_DIRTY; blob->active.num_pages = 0; _spdk_resize_blob(blob, 0); @@ -2769,6 +2917,13 @@ void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Opening blob %lu\n", blobid); + page_num = _spdk_bs_blobid_to_page(blobid); + if (spdk_bit_array_get(bs->used_blobids, page_num) == false) { + /* Invalid blobid */ + cb_fn(cb_arg, NULL, -ENOENT); + return; + } + blob = _spdk_blob_lookup(bs, blobid); if (blob) { blob->open_ref++; @@ -2776,13 +2931,6 @@ void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, return; } - page_num = _spdk_bs_blobid_to_page(blobid); - if (spdk_bit_array_get(bs->used_md_pages, page_num) == false) { - /* Invalid blobid */ - cb_fn(cb_arg, NULL, -ENOENT); - return; - } - blob = _spdk_blob_alloc(bs, blobid); if (!blob) { cb_fn(cb_arg, NULL, -ENOMEM); @@ -2996,8 +3144,8 @@ _spdk_bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno) } ctx->page_num++; - ctx->page_num = spdk_bit_array_find_first_set(bs->used_md_pages, ctx->page_num); - if (ctx->page_num >= spdk_bit_array_capacity(bs->used_md_pages)) { + ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num); + if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) { ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT); free(ctx); return; diff --git a/lib/blob/blobstore.h b/lib/blob/blobstore.h index 0aae2df3a..2a47ae27c 100644 --- a/lib/blob/blobstore.h +++ b/lib/blob/blobstore.h @@ -155,6 +155,7 @@ struct spdk_blob_store { struct spdk_bit_array *used_md_pages; struct spdk_bit_array *used_clusters; + struct spdk_bit_array *used_blobids; uint32_t cluster_sz; uint64_t total_clusters; @@ -200,6 +201,7 @@ enum spdk_blob_op_type { #define SPDK_MD_MASK_TYPE_USED_PAGES 0 #define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1 +#define SPDK_MD_MASK_TYPE_USED_BLOBIDS 2 struct spdk_bs_md_mask { uint8_t type; @@ -308,7 +310,10 @@ struct spdk_bs_super_block { struct spdk_bs_type bstype; /* blobstore type */ - uint8_t reserved[4020]; + uint32_t used_blobid_mask_start; /* Offset from beginning of disk, in pages */ + uint32_t used_blobid_mask_len; /* Count, in pages */ + + uint8_t reserved[4012]; uint32_t crc; }; SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size"); diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index 573117a2d..a7fa2b45b 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -2183,6 +2183,7 @@ bs_version(void) struct spdk_bs_super_block *super; struct spdk_bs_dev *dev; struct spdk_bs_opts opts; + spdk_blob_id blobid; dev = init_dev(); spdk_bs_opts_init(&opts); @@ -2204,7 +2205,18 @@ bs_version(void) */ super = (struct spdk_bs_super_block *)&g_dev_buffer[0]; CU_ASSERT(super->version == SPDK_BS_VERSION); + CU_ASSERT(super->clean == 1); super->version = 2; + /* + * Version 2 metadata does not have a used blobid mask, so clear + * those fields in the super block and zero the corresponding + * region on "disk". We will use this to ensure blob IDs are + * correctly reconstructed. + */ + memset(&g_dev_buffer[super->used_blobid_mask_start * PAGE_SIZE], 0, + super->used_blobid_mask_len * PAGE_SIZE); + super->used_blobid_mask_start = 0; + super->used_blobid_mask_len = 0; super->crc = _spdk_blob_md_page_calc_crc(super); /* Load an existing blob store */ @@ -2212,6 +2224,7 @@ bs_version(void) spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL); CU_ASSERT(g_bserrno == 0); SPDK_CU_ASSERT_FATAL(g_bs != NULL); + CU_ASSERT(super->clean == 0); /* * Create a blob - just to make sure that when we unload it @@ -2221,12 +2234,35 @@ bs_version(void) spdk_bs_create_blob(g_bs, blob_op_with_id_complete, NULL); CU_ASSERT(g_bserrno == 0); CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; /* Unload the blob store */ spdk_bs_unload(g_bs, bs_op_complete, NULL); CU_ASSERT(g_bserrno == 0); g_bs = NULL; CU_ASSERT(super->version == 2); + CU_ASSERT(super->used_blobid_mask_start == 0); + CU_ASSERT(super->used_blobid_mask_len == 0); + + dev = init_dev(); + spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + g_blob = NULL; + spdk_bs_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + + spdk_blob_close(g_blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + CU_ASSERT(super->version == 2); + CU_ASSERT(super->used_blobid_mask_start == 0); + CU_ASSERT(super->used_blobid_mask_len == 0); } int main(int argc, char **argv)