blob: add used blobid bit array for valid blobids

This can be used for two purposes:

1) more quickly iterate the blob list, avoiding
   metadata pages that are valid but not the first
   page in the blob's metadata list
2) close races between delete and open operations -
   now we can clear the bit in the blobid bit array
   when the delete operation is in progress, ensuring
   no one else can try to open the blob

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I3904648fd6fa656cb98c9e17ea763ed5a84ef537

Reviewed-on: https://review.gerrithub.io/391695
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Jim Harris 2017-12-12 15:19:46 -07:00 committed by Daniel Verkamp
parent d6bf976af4
commit 40c911b957
4 changed files with 211 additions and 19 deletions

View File

@ -176,7 +176,8 @@ It contains basic information about the blobstore. The metadata region
is the remainder of cluster 0 and may extend to additional clusters.
<super-block> ::= <sb-version> <sb-len> <sb-super-blob> <sb-params>
<sb-metadata-start> <sb-metadata-len> <crc>
<sb-metadata-start> <sb-metadata-len>
<sb-blobid-start> <sb-blobid-len> <crc>
<sb-version> ::= u32
<sb-len> ::= u32 # Length of this super block, in bytes. Starts from the
# beginning of this structure.
@ -185,6 +186,8 @@ is the remainder of cluster 0 and may extend to additional clusters.
<sb-md-start> ::= u64 # Metadata start location, in pages
<sb-md-len> ::= u64 # Metadata length, in pages
<sb-blobid-start> ::= u32 # Start of bitmask of valid blobids (in pages)
<sb-blobid-len> ::= u32 # Lenget of bitmask of valid blobids (in pages)
<crc> ::= u32 # Crc for super block
The `<sb-params>` data contains parameters specified by the user when the blob

View File

@ -1455,6 +1455,7 @@ _spdk_bs_dev_destroy(void *io_device)
_spdk_blob_free(blob);
}
spdk_bit_array_free(&bs->used_blobids);
spdk_bit_array_free(&bs->used_md_pages);
spdk_bit_array_free(&bs->used_clusters);
/*
@ -1543,12 +1544,14 @@ _spdk_bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts)
/* The metadata is assumed to be at least 1 page */
bs->used_md_pages = spdk_bit_array_create(1);
bs->used_blobids = spdk_bit_array_create(0);
spdk_io_device_register(bs, _spdk_bs_channel_create, _spdk_bs_channel_destroy,
sizeof(struct spdk_bs_channel));
rc = spdk_bs_register_md_thread(bs);
if (rc == -1) {
spdk_io_device_unregister(bs, NULL);
spdk_bit_array_free(&bs->used_blobids);
spdk_bit_array_free(&bs->used_md_pages);
spdk_bit_array_free(&bs->used_clusters);
free(bs);
@ -1664,10 +1667,85 @@ _spdk_bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl
spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, cb_fn, arg);
}
static void
_spdk_bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
{
struct spdk_bs_load_ctx *ctx = arg;
uint64_t mask_size, lba, lba_count;
if (ctx->super->used_blobid_mask_len == 0) {
/*
* This is a pre-v3 on-disk format where the blobid mask does not get
* written to disk.
*/
cb_fn(seq, arg, 0);
return;
}
mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
if (!ctx->mask) {
_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
return;
}
ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
ctx->mask->length = ctx->super->md_len;
assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
_spdk_bs_set_mask(ctx->bs->used_blobids, ctx->mask);
lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, cb_fn, arg);
}
static void
_spdk_bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
uint32_t i, j;
int rc;
/* The type must be correct */
assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
/* The length of the mask (in bits) must not be greater than
* the length of the buffer (converted to bits) */
assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
/* The length of the mask must be exactly equal to the size
* (in pages) of the metadata region */
assert(ctx->mask->length == ctx->super->md_len);
rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
if (rc < 0) {
spdk_dma_free(ctx->mask);
_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
return;
}
for (i = 0; i < ctx->mask->length / 8; i++) {
uint8_t segment = ctx->mask->mask[i];
for (j = 0; segment; j++) {
if (segment & 1U) {
spdk_bit_array_set(ctx->bs->used_blobids, (i * 8) + j);
}
segment >>= 1U;
}
}
spdk_dma_free(ctx->super);
spdk_dma_free(ctx->mask);
free(ctx);
spdk_bs_sequence_finish(seq, bserrno);
}
static void
_spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
uint64_t lba, lba_count, mask_size;
uint32_t i, j;
int rc;
@ -1699,11 +1777,19 @@ _spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserr
}
}
spdk_dma_free(ctx->super);
spdk_dma_free(ctx->mask);
free(ctx);
spdk_bs_sequence_finish(seq, bserrno);
/* Read the used blobids mask */
mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
ctx->mask = spdk_dma_zmalloc(mask_size, 0x1000, NULL);
if (!ctx->mask) {
_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
return;
}
lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
spdk_bs_sequence_read(seq, ctx->mask, lba, lba_count,
_spdk_bs_load_used_blobids_cpl, ctx);
}
static void
@ -1853,14 +1939,26 @@ _spdk_bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int
free(ctx);
}
static void
_spdk_bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
ctx->mask = NULL;
_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl);
}
static void
_spdk_bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
ctx->mask = NULL;
_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_load_write_used_clusters_cpl);
_spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_load_write_used_blobids_cpl);
}
static void
@ -1884,6 +1982,9 @@ _spdk_bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
if (_spdk_bs_load_cur_md_page_valid(ctx) == true) {
if (ctx->page->sequence_num == 0 || ctx->in_page_chain == true) {
spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
if (ctx->page->sequence_num == 0) {
spdk_bit_array_set(ctx->bs->used_blobids, page_num);
}
if (_spdk_bs_load_replay_md_parse_page(ctx->page, ctx->bs)) {
_spdk_bs_load_ctx_fail(seq, ctx, -EILSEQ);
return;
@ -1959,6 +2060,12 @@ _spdk_bs_recover(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
return;
}
rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
if (rc < 0) {
_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
return;
}
rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters);
if (rc < 0) {
_spdk_bs_load_ctx_fail(seq, ctx, -ENOMEM);
@ -2017,11 +2124,19 @@ _spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
ctx->bs->super_blob = ctx->super->super_blob;
memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
if (ctx->super->clean == 1) {
if (ctx->super->clean == 0) {
_spdk_bs_recover(seq, ctx, 0);
} else if (ctx->super->used_blobid_mask_len == 0) {
/*
* Metadata is clean, but this is an old metadata format without
* a blobid mask. Clear the clean bit and then build the masks
* using _spdk_bs_recover.
*/
ctx->super->clean = 0;
_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_recover, ctx);
} else {
ctx->super->clean = 0;
_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_load_write_super_cpl, ctx);
} else {
_spdk_bs_recover(seq, ctx, 0);
}
}
@ -2187,6 +2302,13 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
return;
}
rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
if (rc < 0) {
_spdk_bs_free(bs);
cb_fn(cb_arg, NULL, -ENOMEM);
return;
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
_spdk_bs_free(bs);
@ -2237,10 +2359,20 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
SPDK_BS_PAGE_SIZE);
num_md_pages += ctx->super->used_cluster_mask_len;
/* The used_blobids mask requires 1 bit per metadata page, rounded
* up to the nearest page, plus a header.
*/
ctx->super->used_blobid_mask_start = num_md_pages;
ctx->super->used_blobid_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) +
divide_round_up(bs->md_len, 8),
SPDK_BS_PAGE_SIZE);
num_md_pages += ctx->super->used_blobid_mask_len;
/* The metadata region size was chosen above */
ctx->super->md_start = bs->md_start = num_md_pages;
ctx->super->md_len = bs->md_len;
num_md_pages += bs->md_len;
num_md_lba = _spdk_bs_page_to_lba(bs, num_md_pages);
ctx->super->crc = _spdk_blob_md_page_calc_crc(ctx->super);
@ -2389,14 +2521,26 @@ _spdk_bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, i
_spdk_bs_write_super(seq, ctx->bs, ctx->super, _spdk_bs_unload_write_super_cpl, ctx);
}
static void
_spdk_bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
ctx->mask = NULL;
_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_unload_write_used_clusters_cpl);
}
static void
_spdk_bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
struct spdk_bs_load_ctx *ctx = cb_arg;
spdk_dma_free(ctx->mask);
ctx->mask = NULL;
_spdk_bs_write_used_clusters(seq, cb_arg, _spdk_bs_unload_write_used_clusters_cpl);
_spdk_bs_write_used_blobids(seq, cb_arg, _spdk_bs_unload_write_used_blobids_cpl);
}
static void
@ -2573,6 +2717,7 @@ void spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_
cb_fn(cb_arg, 0, -ENOMEM);
return;
}
spdk_bit_array_set(bs->used_blobids, page_idx);
spdk_bit_array_set(bs->used_md_pages, page_idx);
id = _spdk_bs_page_to_blobid(page_idx);
@ -2686,6 +2831,7 @@ _spdk_bs_delete_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
{
spdk_bs_sequence_t *seq = cb_arg;
struct spdk_blob_data *blob = __blob_to_data(_blob);
uint32_t page_num;
if (bserrno != 0) {
spdk_bs_sequence_finish(seq, bserrno);
@ -2707,6 +2853,8 @@ _spdk_bs_delete_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
* get returned after this point by _spdk_blob_lookup().
*/
TAILQ_REMOVE(&blob->bs->blobs, blob, link);
page_num = _spdk_bs_blobid_to_page(blob->id);
spdk_bit_array_clear(blob->bs->used_blobids, page_num);
blob->state = SPDK_BLOB_STATE_DIRTY;
blob->active.num_pages = 0;
_spdk_resize_blob(blob, 0);
@ -2769,6 +2917,13 @@ void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Opening blob %lu\n", blobid);
page_num = _spdk_bs_blobid_to_page(blobid);
if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
/* Invalid blobid */
cb_fn(cb_arg, NULL, -ENOENT);
return;
}
blob = _spdk_blob_lookup(bs, blobid);
if (blob) {
blob->open_ref++;
@ -2776,13 +2931,6 @@ void spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
return;
}
page_num = _spdk_bs_blobid_to_page(blobid);
if (spdk_bit_array_get(bs->used_md_pages, page_num) == false) {
/* Invalid blobid */
cb_fn(cb_arg, NULL, -ENOENT);
return;
}
blob = _spdk_blob_alloc(bs, blobid);
if (!blob) {
cb_fn(cb_arg, NULL, -ENOMEM);
@ -2996,8 +3144,8 @@ _spdk_bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
}
ctx->page_num++;
ctx->page_num = spdk_bit_array_find_first_set(bs->used_md_pages, ctx->page_num);
if (ctx->page_num >= spdk_bit_array_capacity(bs->used_md_pages)) {
ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
free(ctx);
return;

View File

@ -155,6 +155,7 @@ struct spdk_blob_store {
struct spdk_bit_array *used_md_pages;
struct spdk_bit_array *used_clusters;
struct spdk_bit_array *used_blobids;
uint32_t cluster_sz;
uint64_t total_clusters;
@ -200,6 +201,7 @@ enum spdk_blob_op_type {
#define SPDK_MD_MASK_TYPE_USED_PAGES 0
#define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1
#define SPDK_MD_MASK_TYPE_USED_BLOBIDS 2
struct spdk_bs_md_mask {
uint8_t type;
@ -308,7 +310,10 @@ struct spdk_bs_super_block {
struct spdk_bs_type bstype; /* blobstore type */
uint8_t reserved[4020];
uint32_t used_blobid_mask_start; /* Offset from beginning of disk, in pages */
uint32_t used_blobid_mask_len; /* Count, in pages */
uint8_t reserved[4012];
uint32_t crc;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size");

View File

@ -2183,6 +2183,7 @@ bs_version(void)
struct spdk_bs_super_block *super;
struct spdk_bs_dev *dev;
struct spdk_bs_opts opts;
spdk_blob_id blobid;
dev = init_dev();
spdk_bs_opts_init(&opts);
@ -2204,7 +2205,18 @@ bs_version(void)
*/
super = (struct spdk_bs_super_block *)&g_dev_buffer[0];
CU_ASSERT(super->version == SPDK_BS_VERSION);
CU_ASSERT(super->clean == 1);
super->version = 2;
/*
* Version 2 metadata does not have a used blobid mask, so clear
* those fields in the super block and zero the corresponding
* region on "disk". We will use this to ensure blob IDs are
* correctly reconstructed.
*/
memset(&g_dev_buffer[super->used_blobid_mask_start * PAGE_SIZE], 0,
super->used_blobid_mask_len * PAGE_SIZE);
super->used_blobid_mask_start = 0;
super->used_blobid_mask_len = 0;
super->crc = _spdk_blob_md_page_calc_crc(super);
/* Load an existing blob store */
@ -2212,6 +2224,7 @@ bs_version(void)
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
CU_ASSERT(super->clean == 0);
/*
* Create a blob - just to make sure that when we unload it
@ -2221,12 +2234,35 @@ bs_version(void)
spdk_bs_create_blob(g_bs, blob_op_with_id_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
blobid = g_blobid;
/* Unload the blob store */
spdk_bs_unload(g_bs, bs_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
CU_ASSERT(super->version == 2);
CU_ASSERT(super->used_blobid_mask_start == 0);
CU_ASSERT(super->used_blobid_mask_len == 0);
dev = init_dev();
spdk_bs_load(dev, &opts, bs_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
g_blob = NULL;
spdk_bs_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL);
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
spdk_blob_close(g_blob, blob_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
spdk_bs_unload(g_bs, bs_op_complete, NULL);
CU_ASSERT(g_bserrno == 0);
g_bs = NULL;
CU_ASSERT(super->version == 2);
CU_ASSERT(super->used_blobid_mask_start == 0);
CU_ASSERT(super->used_blobid_mask_len == 0);
}
int main(int argc, char **argv)