lib/blob: add EXTENT_TABLE descriptor to blobs

Added new descriptor SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE.

Extent Table will hold md page offsets for new Extent Page descriptor.
Entries in Extent Table are run-length encoded 0's as unallocated
Extent Page descriptors.

Additionally total number of clusters is persisted in each Extent
Table descriptor. This is because there is no guarantee that
last Extent Page of a blob will be allocated.
Even if number of Extents per Extent Page is always the same,
Extent Page can hold less Extents than that.

This patch does not add more metadata on disk right now.
Only added descriptor parsing/serialization and applicable fields
to store it in run time.

Following patches are going to implement TODO's added in this patch.

Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Change-Id: Iac5d8f00ddfc655c507bc26d69d7adf8495074e9
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466920
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Tomasz Zawadzki 2020-01-22 09:48:29 -05:00
parent 2f8bdb3c82
commit f60b4a7e28
2 changed files with 221 additions and 5 deletions

View File

@ -131,6 +131,7 @@ _spdk_bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
if (update_map) {
_spdk_blob_insert_cluster(blob, cluster_num, *lowest_free_cluster);
/* TODO: Claim used_md_pages for extent pages */
}
return 0;
@ -227,6 +228,8 @@ _spdk_blob_free(struct spdk_blob *blob)
{
assert(blob != NULL);
free(blob->active.extent_pages);
free(blob->clean.extent_pages);
free(blob->active.clusters);
free(blob->clean.clusters);
free(blob->active.pages);
@ -344,15 +347,27 @@ _spdk_blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void
static int
_spdk_blob_mark_clean(struct spdk_blob *blob)
{
uint32_t *extent_pages = NULL;
uint64_t *clusters = NULL;
uint32_t *pages = NULL;
assert(blob != NULL);
if (blob->active.num_extent_pages) {
assert(blob->active.extent_pages);
extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
if (!extent_pages) {
return -ENOMEM;
}
memcpy(extent_pages, blob->active.extent_pages,
blob->active.num_extent_pages * sizeof(*extent_pages));
}
if (blob->active.num_clusters) {
assert(blob->active.clusters);
clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
if (!clusters) {
free(extent_pages);
return -ENOMEM;
}
memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
@ -362,20 +377,25 @@ _spdk_blob_mark_clean(struct spdk_blob *blob)
assert(blob->active.pages);
pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
if (!pages) {
free(extent_pages);
free(clusters);
return -ENOMEM;
}
memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
}
free(blob->clean.extent_pages);
free(blob->clean.clusters);
free(blob->clean.pages);
blob->clean.num_extent_pages = blob->active.num_extent_pages;
blob->clean.extent_pages = blob->active.extent_pages;
blob->clean.num_clusters = blob->active.num_clusters;
blob->clean.clusters = blob->active.clusters;
blob->clean.num_pages = blob->active.num_pages;
blob->clean.pages = blob->active.pages;
blob->active.extent_pages = extent_pages;
blob->active.clusters = clusters;
blob->active.pages = pages;
@ -532,7 +552,56 @@ _spdk_blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *bl
}
}
}
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
uint32_t num_extent_pages = blob->active.num_extent_pages;
uint32_t i, j;
size_t extent_pages_length;
desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
if (blob->extent_rle_found) {
/* This means that Extent RLE is present in MD,
* both should never be at the same time. */
return -EINVAL;
}
blob->extent_table_found = true;
if (desc_extent_table->length == 0 ||
(extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
return -EINVAL;
}
for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
num_extent_pages += desc_extent_table->extent_page[i].num_pages;
}
tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
if (tmp == NULL) {
return -ENOMEM;
}
blob->active.extent_pages = tmp;
blob->active.extent_pages_array_size = num_extent_pages;
blob->num_clusters_in_et = desc_extent_table->num_clusters;
/* Extent table entries contain md page numbers for extent pages.
* Zeroes represent unallocated extent pages, those are run-length-encoded.
*/
for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
if (desc_extent_table->extent_page[i].page_idx != 0) {
assert(desc_extent_table->extent_page[i].num_pages == 1);
blob->active.extent_pages[blob->active.num_extent_pages++] =
desc_extent_table->extent_page[i].page_idx;
} else if (spdk_blob_is_thin_provisioned(blob)) {
for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
}
} else {
return -EINVAL;
}
}
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
int rc;
@ -684,6 +753,99 @@ _spdk_blob_serialize_xattr(const struct spdk_xattr *xattr,
return 0;
}
static void
_spdk_blob_serialize_extent_table_entry(const struct spdk_blob *blob,
uint64_t start_ep, uint64_t *next_ep,
uint8_t **buf, size_t *remaining_sz)
{
struct spdk_blob_md_descriptor_extent_table *desc;
size_t cur_sz;
uint64_t i, et_idx;
uint32_t extent_page, ep_len;
/* The buffer must have room for at least one extent page */
cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters) + sizeof(
desc->extent_page[0]);
if (*remaining_sz < cur_sz) {
*next_ep = start_ep;
return;
}
desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
desc->num_clusters = blob->active.num_clusters;
extent_page = blob->active.extent_pages[start_ep];
ep_len = 1;
et_idx = 0;
for (i = start_ep + 1; i < blob->active.num_extent_pages; i++) {
/* Extent table entries contain md page offsets for extent pages.
* Zeroes represent unallocated extent pages, which are run-length-encoded.
*/
if (extent_page == 0 && blob->active.extent_pages[i] == 0) {
ep_len++;
continue;
}
desc->extent_page[et_idx].page_idx = extent_page;
desc->extent_page[et_idx].num_pages = ep_len;
et_idx++;
cur_sz += sizeof(desc->extent_page[et_idx]);
if (*remaining_sz < cur_sz) {
/* If we ran out of buffer space, return */
*next_ep = i;
break;
}
extent_page = blob->active.extent_pages[i];
ep_len = 1;
}
if (*remaining_sz >= cur_sz) {
desc->extent_page[et_idx].page_idx = extent_page;
desc->extent_page[et_idx].num_pages = ep_len;
et_idx++;
*next_ep = blob->active.num_extent_pages;
}
desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
*remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
*buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
}
static int
_spdk_blob_serialize_extent_table(const struct spdk_blob *blob,
struct spdk_blob_md_page **pages,
struct spdk_blob_md_page *cur_page,
uint32_t *page_count, uint8_t **buf,
size_t *remaining_sz)
{
uint64_t last_extent_page;
int rc;
last_extent_page = 0;
while (last_extent_page < blob->active.num_extent_pages) {
_spdk_blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
remaining_sz);
if (last_extent_page == blob->active.num_extent_pages) {
break;
}
rc = _spdk_blob_serialize_add_page(blob, pages, page_count, &cur_page);
if (rc < 0) {
return rc;
}
*buf = (uint8_t *)cur_page->descriptors;
*remaining_sz = sizeof(cur_page->descriptors);
}
return 0;
}
static void
_spdk_blob_serialize_extent_rle(const struct spdk_blob *blob,
uint64_t start_cluster, uint64_t *next_cluster,
@ -897,9 +1059,8 @@ _spdk_blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pa
}
if (blob->use_extent_table) {
/* Serialization as extent pages is not yet implemented */
assert(false);
rc = -ENOSYS;
/* Serialize extent table */
rc = _spdk_blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
} else {
/* Serialize extents */
rc = _spdk_blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
@ -1194,10 +1355,16 @@ _spdk_blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int
tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
assert(tmp != NULL);
blob->active.clusters = tmp;
tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
assert(tmp != NULL);
blob->active.extent_pages = tmp;
#endif
blob->active.extent_pages_array_size = blob->active.num_extent_pages;
blob->active.cluster_array_size = blob->active.num_clusters;
}
/* TODO: Add path to persist clear extent pages. */
_spdk_blob_persist_complete(seq, ctx, bserrno);
}
@ -1411,7 +1578,7 @@ _spdk_blob_resize(struct spdk_blob *blob, uint64_t sz)
}
/* Do two passes - one to verify that we can obtain enough clusters
* and another to actually claim them.
* and md pages, another to actually claim them.
*/
if (spdk_blob_is_thin_provisioned(blob) == false) {
@ -1424,6 +1591,7 @@ _spdk_blob_resize(struct spdk_blob *blob, uint64_t sz)
}
lfc++;
}
/* TODO: Check if enough used_md_pages are available. */
}
if (sz > num_clusters) {
@ -1438,6 +1606,8 @@ _spdk_blob_resize(struct spdk_blob *blob, uint64_t sz)
sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
blob->active.clusters = tmp;
blob->active.cluster_array_size = sz;
/* TODO: Expand the extents table, only if enough clusters were added */
}
blob->state = SPDK_BLOB_STATE_DIRTY;
@ -3118,6 +3288,9 @@ _spdk_bs_load_replay_md_parse_page(const struct spdk_blob_md_page *page, struct
/* Skip this item */
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
/* Skip this item */
} else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
/* TODO: Read the extent pages when replaying the md,
* only after particular blob md chain was read */
} else {
/* Error */
return -EINVAL;
@ -4617,10 +4790,15 @@ static void
_spdk_bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
{
uint64_t *cluster_temp;
uint32_t *extent_page_temp;
cluster_temp = blob1->active.clusters;
blob1->active.clusters = blob2->active.clusters;
blob2->active.clusters = cluster_temp;
extent_page_temp = blob1->active.extent_pages;
blob1->active.extent_pages = blob2->active.extent_pages;
blob2->active.extent_pages = extent_page_temp;
}
static void
@ -4753,6 +4931,8 @@ _spdk_bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bs
assert(spdk_blob_is_thin_provisioned(newblob));
assert(spdk_mem_all_zero(newblob->active.clusters,
newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
assert(spdk_mem_all_zero(newblob->active.extent_pages,
newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
_spdk_blob_freeze_io(origblob, _spdk_bs_snapshot_freeze_cpl, ctx);
}

View File

@ -77,6 +77,18 @@ struct spdk_blob_mut_data {
*/
size_t cluster_array_size;
/* Number of extent pages */
uint64_t num_extent_pages;
/* Array of page offsets into the metadata region,
* containing extents. Can contain entries for not yet
* allocated pages. */
uint32_t *extent_pages;
/* The size of the extent page array. This is greater than or
* equal to 'num_extent_pages'. */
size_t extent_pages_array_size;
/* Number of metadata pages */
uint32_t num_pages;
@ -153,6 +165,10 @@ struct spdk_blob {
bool extent_rle_found;
bool extent_table_found;
bool use_extent_table;
/* Number of data clusters retrived from extent table,
* that many have to be read from extent pages. */
uint64_t num_clusters_in_et;
};
struct spdk_blob_store {
@ -249,13 +265,20 @@ struct spdk_bs_md_mask {
#define SPDK_MD_DESCRIPTOR_TYPE_FLAGS 3
#define SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL 4
/* Following descriptors define cluster layout in a blob. */
/* Following descriptors define cluster layout in a blob.
* EXTENT_RLE cannot be present in blobs metadata,
* at the same time as EXTENT_TABLE and EXTENT_PAGE descriptors. */
/* EXTENT_RLE descriptor holds an array of LBA that points to
* beginning of allocated clusters. The array is run-length encoded,
* with 0's being unallocated clusters. It is part of serialized
* metadata chain for a blob. */
#define SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE 1
/* EXTENT_TABLE descriptor holds array of md page offsets that
* point to pages with EXTENT_PAGE descriptor. The 0's in the array
* are run-length encoded, non-zero values are unallocated pages.
* It is part of serialized metadata chain for a blob. */
#define SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE 5
struct spdk_blob_md_descriptor_xattr {
uint8_t type;
@ -278,6 +301,19 @@ struct spdk_blob_md_descriptor_extent_rle {
} extents[0];
};
struct spdk_blob_md_descriptor_extent_table {
uint8_t type;
uint32_t length;
/* Number of data clusters in the blob */
uint64_t num_clusters;
struct {
uint32_t page_idx;
uint32_t num_pages; /* In units of pages */
} extent_page[0];
};
#define SPDK_BLOB_THIN_PROV (1ULL << 0)
#define SPDK_BLOB_INTERNAL_XATTR (1ULL << 1)
#define SPDK_BLOB_INVALID_FLAGS_MASK (SPDK_BLOB_THIN_PROV | SPDK_BLOB_INTERNAL_XATTR)