diff --git a/include/spdk/blob.h b/include/spdk/blob.h new file mode 100644 index 000000000..3a6f41f9a --- /dev/null +++ b/include/spdk/blob.h @@ -0,0 +1,267 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Blob Storage System + * + * The blob storage system, or the blobstore for short, is a low level + * library for placing opaque blobs of data onto a storage device such + * that scattered physical blocks on the storage device appear as a + * single, contiguous storage region. These blobs are also persistent, + * which means they are rediscoverable after reboot or power loss. + * + * The blobstore is designed to be very high performance, and thus has + * a few general rules regarding thread safety to avoid taking locks + * in the I/O path. Functions starting with the prefix "spdk_bs_md" must only + * be called from the metadata thread, of which there is only one at a time. + * The user application can declare which thread is the metadata thread by + * calling \ref spdk_bs_register_md_thread, but by default it is the thread + * that was used to create the blobstore initially. The metadata thread can + * be changed at run time by first unregistering + * (\ref spdk_bs_unregister_md_thread) and then re-registering. Registering + * a thread as the metadata thread is expensive and should be avoided. + * + * Functions starting with the prefix "spdk_bs_io" are passed a channel + * as an argument, and channels may only be used from the thread they were + * created on. See \ref spdk_bs_alloc_io_channel. + * + * Functions not starting with one of those two prefixes are thread safe + * and may be called from any thread at any time. + * + * The blob store returns errors using negated POSIX errno values, either + * returned in the callback or as a return value. An errno value of 0 means + * success. + */ + +#ifndef SPDK_BLOB_H +#define SPDK_BLOB_H + +#include +#include + +typedef uint64_t spdk_blob_id; +#define SPDK_BLOBID_INVALID (uint64_t)-1 + +struct spdk_blob_store; +struct spdk_io_channel; +struct spdk_blob; +struct spdk_xattr_names; + +typedef void (*spdk_bs_op_complete)(void *cb_arg, int bserrno); +typedef void (*spdk_bs_op_with_handle_complete)(void *cb_arg, struct spdk_blob_store *bs, + int bserrno); +typedef void (*spdk_blob_op_complete)(void *cb_arg, int bserrno); +typedef void (*spdk_blob_op_with_id_complete)(void *cb_arg, spdk_blob_id blobid, int bserrno); +typedef void (*spdk_blob_op_with_handle_complete)(void *cb_arg, struct spdk_blob *blb, int bserrno); + + +/* Calls to function pointers of this type must obey all of the normal + rules for channels. The channel passed to this completion must match + the channel the operation was initiated on. */ +typedef void (*spdk_bs_dev_cpl)(struct spdk_io_channel *channel, + void *cb_arg, int bserrno); + +struct spdk_bs_dev_cb_args { + spdk_bs_dev_cpl cb_fn; + struct spdk_io_channel *channel; + void *cb_arg; + /* + * Blobstore device implementations can use this for scratch space for any data + * structures needed to translate the function arguments to the required format + * for the backing store. + */ + uint8_t scratch[32]; +}; + +struct spdk_bs_dev { + /* Create a new channel which is a software construct that is used + * to submit I/O. */ + struct spdk_io_channel *(*create_channel)(struct spdk_bs_dev *dev); + + /* Destroy a previously created channel */ + void (*destroy_channel)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel); + + /* Destroy this blobstore device. Applications must not destroy the blobstore device, + * rather the blobstore will destroy it using this function pointer once all + * references to it during unload callback context have been completed. + */ + void (*destroy)(struct spdk_bs_dev *dev); + + void (*read)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*write)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + void (*flush)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct spdk_bs_dev_cb_args *cb_args); + + void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args); + + uint64_t blockcnt; + uint32_t blocklen; /* In bytes */ +}; + +struct spdk_bs_opts { + uint32_t cluster_sz; /* In bytes. Must be multiple of page size. */ + uint32_t num_md_pages; /* Count of the number of pages reserved for metadata */ + uint32_t max_md_ops; /* Maximum simultaneous metadata operations */ +}; + +/* Initialize an spdk_bs_opts structure to the default blobstore option values. */ +void spdk_bs_opts_init(struct spdk_bs_opts *opts); + +/* Load a blob store from the given device. This will fail (return NULL) if no blob store is present. */ +void spdk_bs_load(struct spdk_bs_dev *dev, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg); + +/* Initialize a blob store on the given disk. Destroys all data present on the device. */ +void spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg); + +/* Flush all volatile data to disk and destroy in-memory structures. */ +void spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg); + +/* Set the given blob as the super blob. This will be retrievable immediately after an + * spdk_bs_load on the next initialization. + */ +void spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_bs_op_complete cb_fn, void *cb_arg); + +/* Open the super blob. */ +void spdk_bs_get_super(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/* Get the cluster size in bytes. Used in the extend operation. */ +uint64_t spdk_bs_get_cluster_size(struct spdk_blob_store *bs); + +/* Get the page size in bytes. This is the write and read granularity of blobs. */ +uint64_t spdk_bs_get_page_size(struct spdk_blob_store *bs); + +/* Get the number of free clusters. */ +uint64_t spdk_bs_free_cluster_count(struct spdk_blob_store *bs); + +/* Register the current thread as the metadata thread. All functions beginning with + * the prefix "spdk_bs_md" must be called only from this thread. + */ +int spdk_bs_register_md_thread(struct spdk_blob_store *bs); + +/* Unregister the current thread as the metadata thread. This allows a different + * thread to be registered. + */ +int spdk_bs_unregister_md_thread(struct spdk_blob_store *bs); + +/* Return the blobid */ +spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob); + +/* Return the number of pages allocated to the blob */ +uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob); + +/* Return the number of clusters allocated to the blob */ +uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob); + +/* Create a new blob with initial size of 'sz' clusters. */ +void spdk_bs_md_create_blob(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg); + +/* Delete an existing blob. */ +void spdk_bs_md_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/* Open a blob */ +void spdk_bs_md_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +/* Resize a blob to 'sz' clusters. + * + * These changes are not persisted to disk until + * spdk_bs_md_sync_blob() is called. */ +int spdk_bs_md_resize_blob(struct spdk_blob *blob, size_t sz); + +/* Sync a blob */ +/* Make a blob persistent. This applies to open, resize, set xattr, + * and remove xattr. These operations will not be persistent until + * the blob has been synced. + * + * I/O operations (read/write) are synced independently. See + * spdk_bs_io_flush_channel(). + */ +void spdk_bs_md_sync_blob(struct spdk_blob *blob, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/* Close a blob. This will automatically sync. */ +void spdk_bs_md_close_blob(struct spdk_blob **blob, spdk_blob_op_complete cb_fn, void *cb_arg); + +struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs, + uint32_t priority, uint32_t max_ops); + +void spdk_bs_free_io_channel(struct spdk_io_channel *channel); + +/* Force all previously completed operations on this channel to be persistent. */ +void spdk_bs_io_flush_channel(struct spdk_io_channel *channel, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/* Write data to a blob. Offset is in pages from the beginning of the blob. */ +void spdk_bs_io_write_blob(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + + +/* Read data from a blob. Offset is in pages from the beginning of the blob. */ +void spdk_bs_io_read_blob(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg); + +/* Iterate through all blobs */ +void spdk_bs_md_iter_first(struct spdk_blob_store *bs, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); +void spdk_bs_md_iter_next(struct spdk_blob_store *bs, struct spdk_blob **blob, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); + +int spdk_blob_md_set_xattr(struct spdk_blob *blob, const char *name, const void *value, + uint16_t value_len); +int spdk_blob_md_remove_xattr(struct spdk_blob *blob, const char *name); +int spdk_bs_md_get_xattr_value(struct spdk_blob *blob, const char *name, + const void **value, size_t *value_len); +int spdk_bs_md_get_xattr_names(struct spdk_blob *blob, + struct spdk_xattr_names **names); + +uint32_t spdk_xattr_names_get_count(struct spdk_xattr_names *names); +const char *spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index); +void spdk_xattr_names_free(struct spdk_xattr_names *names); + +#endif /* SPDK_BLOB_H_ */ diff --git a/lib/blob/Makefile b/lib/blob/Makefile new file mode 100644 index 000000000..dd77b7f8d --- /dev/null +++ b/lib/blob/Makefile @@ -0,0 +1,41 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += $(ENV_CFLAGS) +C_SRCS = blobstore.c request.c +LIBNAME = blob + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c new file mode 100644 index 000000000..bc43883a2 --- /dev/null +++ b/lib/blob/blobstore.c @@ -0,0 +1,2335 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "spdk/blob.h" +#include "spdk/env.h" +#include "spdk/queue.h" +#include "spdk/io_channel.h" +#include "spdk/bit_array.h" + +#include "spdk_internal/log.h" + +#include "blobstore.h" +#include "request.h" + +static inline size_t +divide_round_up(size_t num, size_t divisor) +{ + return (num + divisor - 1) / divisor; +} + +static void +_spdk_bs_claim_cluster(struct spdk_blob_store *bs, uint32_t cluster_num) +{ + assert(cluster_num < spdk_bit_array_capacity(bs->used_clusters)); + assert(spdk_bit_array_get(bs->used_clusters, cluster_num) == false); + assert(bs->num_free_clusters > 0); + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Claiming cluster %u\n", cluster_num); + + spdk_bit_array_set(bs->used_clusters, cluster_num); + bs->num_free_clusters--; +} + +static void +_spdk_bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num) +{ + assert(cluster_num < spdk_bit_array_capacity(bs->used_clusters)); + assert(spdk_bit_array_get(bs->used_clusters, cluster_num) == true); + assert(bs->num_free_clusters < bs->total_clusters); + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Releasing cluster %u\n", cluster_num); + + spdk_bit_array_clear(bs->used_clusters, cluster_num); + bs->num_free_clusters++; +} + +static struct spdk_blob * +_spdk_blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id) +{ + struct spdk_blob *blob; + + blob = calloc(1, sizeof(*blob)); + if (!blob) { + return NULL; + } + + blob->id = id; + blob->bs = bs; + + blob->state = SPDK_BLOB_STATE_DIRTY; + blob->active.num_pages = 1; + blob->active.pages = calloc(1, sizeof(*blob->active.pages)); + if (!blob->active.pages) { + free(blob); + return NULL; + } + + blob->active.pages[0] = _spdk_bs_blobid_to_page(id); + + TAILQ_INIT(&blob->xattrs); + + return blob; +} + +static void +_spdk_blob_free(struct spdk_blob *blob) +{ + struct spdk_xattr *xattr, *xattr_tmp; + + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_CLEAN); + + free(blob->active.clusters); + free(blob->clean.clusters); + free(blob->active.pages); + free(blob->clean.pages); + + TAILQ_FOREACH_SAFE(xattr, &blob->xattrs, link, xattr_tmp) { + TAILQ_REMOVE(&blob->xattrs, xattr, link); + free(xattr->name); + free(xattr->value); + free(xattr); + } + + free(blob); +} + +static int +_spdk_blob_mark_clean(struct spdk_blob *blob) +{ + uint64_t *clusters = NULL; + uint32_t *pages = NULL; + + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_LOADING || + blob->state == SPDK_BLOB_STATE_SYNCING); + + if (blob->active.num_clusters) { + assert(blob->active.clusters); + clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters)); + if (!clusters) { + return -1; + } + memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*clusters)); + } + + if (blob->active.num_pages) { + assert(blob->active.pages); + pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages)); + if (!pages) { + free(clusters); + return -1; + } + memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*pages)); + } + + free(blob->clean.clusters); + free(blob->clean.pages); + + blob->clean.num_clusters = blob->active.num_clusters; + blob->clean.clusters = blob->active.clusters; + blob->clean.num_pages = blob->active.num_pages; + blob->clean.pages = blob->active.pages; + + blob->active.clusters = clusters; + blob->active.pages = pages; + + blob->state = SPDK_BLOB_STATE_CLEAN; + + return 0; +} + +static void +_spdk_blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob) +{ + struct spdk_blob_md_descriptor *desc; + size_t cur_desc = 0; + void *tmp; + + desc = (struct spdk_blob_md_descriptor *)page->descriptors; + while (cur_desc < sizeof(page->descriptors)) { + if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) { + if (desc->length == 0) { + /* If padding and length are 0, this terminates the page */ + break; + } + } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT) { + struct spdk_blob_md_descriptor_extent *desc_extent; + unsigned int i, j; + unsigned int cluster_count = blob->active.num_clusters; + + desc_extent = (struct spdk_blob_md_descriptor_extent *)desc; + + assert(desc_extent->length > 0); + assert(desc_extent->length % sizeof(desc_extent->extents[0]) == 0); + + for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) { + for (j = 0; j < desc_extent->extents[i].length; j++) { + assert(spdk_bit_array_get(blob->bs->used_clusters, desc_extent->extents[i].cluster_idx + j)); + cluster_count++; + } + } + + assert(cluster_count > 0); + tmp = realloc(blob->active.clusters, cluster_count * sizeof(uint64_t)); + assert(tmp != NULL); + blob->active.clusters = tmp; + blob->active.cluster_array_size = cluster_count; + + for (i = 0; i < desc_extent->length / sizeof(desc_extent->extents[0]); i++) { + for (j = 0; j < desc_extent->extents[i].length; j++) { + blob->active.clusters[blob->active.num_clusters++] = _spdk_bs_cluster_to_lba(blob->bs, + desc_extent->extents[i].cluster_idx + j); + } + } + + } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) { + struct spdk_blob_md_descriptor_xattr *desc_xattr; + struct spdk_xattr *xattr; + + desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc; + + xattr = calloc(1, sizeof(*xattr)); + assert(xattr != NULL); + + xattr->name = malloc(desc_xattr->name_length + 1); + strncpy(xattr->name, desc_xattr->name, desc_xattr->name_length); + xattr->name[desc_xattr->name_length] = '\0'; + + xattr->value = malloc(desc_xattr->value_length); + assert(xattr->value != NULL); + xattr->value_len = desc_xattr->value_length; + memcpy(xattr->value, + (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length), + desc_xattr->value_length); + + TAILQ_INSERT_TAIL(&blob->xattrs, xattr, link); + } else { + /* Error */ + break; + } + + /* Advance to the next descriptor */ + desc = (struct spdk_blob_md_descriptor *)((uintptr_t)desc + sizeof(*desc) + desc->length); + cur_desc += sizeof(*desc) + desc->length; + } +} + +static int +_spdk_blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count, + struct spdk_blob *blob) +{ + const struct spdk_blob_md_page *page; + uint32_t i; + + assert(page_count > 0); + assert(pages[0].sequence_num == 0); + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_LOADING); + assert(blob->active.clusters == NULL); + assert(blob->id == pages[0].id); + assert(blob->state == SPDK_BLOB_STATE_LOADING); + + for (i = 0; i < page_count; i++) { + page = &pages[i]; + + assert(page->id == blob->id); + assert(page->sequence_num == i); + + _spdk_blob_parse_page(page, blob); + } + + return 0; +} + +static int +_spdk_blob_serialize_add_page(const struct spdk_blob *blob, + struct spdk_blob_md_page **pages, + uint32_t *page_count, + struct spdk_blob_md_page **last_page) +{ + struct spdk_blob_md_page *page; + + assert(pages != NULL); + assert(page_count != NULL); + + if (*page_count == 0) { + assert(*pages == NULL); + *page_count = 1; + *pages = spdk_zmalloc(sizeof(struct spdk_blob_md_page), + sizeof(struct spdk_blob_md_page), + NULL); + } else { + assert(*pages != NULL); + (*page_count)++; + *pages = spdk_realloc(*pages, + sizeof(struct spdk_blob_md_page) * (*page_count), + sizeof(struct spdk_blob_md_page), + NULL); + } + + if (*pages == NULL) { + *page_count = 0; + *last_page = NULL; + return -ENOMEM; + } + + page = &(*pages)[*page_count - 1]; + page->id = blob->id; + page->sequence_num = *page_count - 1; + page->next = SPDK_INVALID_MD_PAGE; + *last_page = page; + + return 0; +} + +/* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor. + * Update required_sz on both success and failure. + * + */ +static int +_spdk_blob_serialize_xattr(const struct spdk_xattr *xattr, + uint8_t *buf, size_t buf_sz, + size_t *required_sz) +{ + struct spdk_blob_md_descriptor_xattr *desc; + + *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) + + strlen(xattr->name) + + xattr->value_len; + + if (buf_sz < *required_sz) { + return -1; + } + + desc = (struct spdk_blob_md_descriptor_xattr *)buf; + + desc->type = SPDK_MD_DESCRIPTOR_TYPE_XATTR; + desc->length = sizeof(desc->name_length) + + sizeof(desc->value_length) + + strlen(xattr->name) + + xattr->value_len; + desc->name_length = strlen(xattr->name); + desc->value_length = xattr->value_len; + + memcpy(desc->name, xattr->name, desc->name_length); + memcpy((void *)((uintptr_t)desc->name + desc->name_length), + xattr->value, + desc->value_length); + + return 0; +} + +static void +_spdk_blob_serialize_extent(const struct spdk_blob *blob, + uint64_t start_cluster, uint64_t *next_cluster, + uint8_t *buf, size_t buf_sz) +{ + struct spdk_blob_md_descriptor_extent *desc; + size_t cur_sz; + uint64_t i, extent_idx; + uint32_t lba, lba_per_cluster, lba_count; + + /* The buffer must have room for at least one extent */ + cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->extents[0]); + if (buf_sz < cur_sz) { + *next_cluster = start_cluster; + return; + } + + desc = (struct spdk_blob_md_descriptor_extent *)buf; + desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT; + + lba_per_cluster = _spdk_bs_cluster_to_lba(blob->bs, 1); + + lba = blob->active.clusters[start_cluster]; + lba_count = lba_per_cluster; + extent_idx = 0; + for (i = start_cluster + 1; i < blob->active.num_clusters; i++) { + if ((lba + lba_count) == blob->active.clusters[i]) { + lba_count += lba_per_cluster; + continue; + } + desc->extents[extent_idx].cluster_idx = lba / lba_per_cluster; + desc->extents[extent_idx].length = lba_count / lba_per_cluster; + extent_idx++; + + cur_sz += sizeof(desc->extents[extent_idx]); + + if (buf_sz < cur_sz) { + /* If we ran out of buffer space, return */ + desc->length = sizeof(desc->extents[0]) * extent_idx; + *next_cluster = i; + return; + } + + lba = blob->active.clusters[i]; + lba_count = lba_per_cluster; + } + + desc->extents[extent_idx].cluster_idx = lba / lba_per_cluster; + desc->extents[extent_idx].length = lba_count / lba_per_cluster; + extent_idx++; + + desc->length = sizeof(desc->extents[0]) * extent_idx; + *next_cluster = blob->active.num_clusters; + + return; +} + +static int +_spdk_blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages, + uint32_t *page_count) +{ + struct spdk_blob_md_page *cur_page; + const struct spdk_xattr *xattr; + int rc; + uint8_t *buf; + size_t remaining_sz; + + assert(pages != NULL); + assert(page_count != NULL); + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_SYNCING); + + *pages = NULL; + *page_count = 0; + + /* A blob always has at least 1 page, even if it has no descriptors */ + rc = _spdk_blob_serialize_add_page(blob, pages, page_count, &cur_page); + if (rc < 0) { + return rc; + } + + buf = (uint8_t *)cur_page->descriptors; + remaining_sz = sizeof(cur_page->descriptors); + + /* Serialize xattrs */ + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + size_t required_sz = 0; + rc = _spdk_blob_serialize_xattr(xattr, + buf, remaining_sz, + &required_sz); + if (rc < 0) { + /* Need to add a new page to the chain */ + rc = _spdk_blob_serialize_add_page(blob, pages, page_count, + &cur_page); + if (rc < 0) { + spdk_free(*pages); + *pages = NULL; + *page_count = 0; + return rc; + } + + buf = (uint8_t *)cur_page->descriptors; + remaining_sz = sizeof(cur_page->descriptors); + + /* Try again */ + required_sz = 0; + rc = _spdk_blob_serialize_xattr(xattr, + buf, remaining_sz, + &required_sz); + + if (rc < 0) { + spdk_free(*pages); + *pages = NULL; + *page_count = 0; + return -1; + } + } + + remaining_sz -= required_sz; + buf += required_sz; + } + + /* Serialize extents */ + uint64_t last_cluster = 0; + while (last_cluster < blob->active.num_clusters) { + _spdk_blob_serialize_extent(blob, last_cluster, &last_cluster, + buf, remaining_sz); + + if (last_cluster == blob->active.num_clusters) { + break; + } + + rc = _spdk_blob_serialize_add_page(blob, pages, page_count, + &cur_page); + if (rc < 0) { + return rc; + } + + buf = (uint8_t *)cur_page->descriptors; + remaining_sz = sizeof(cur_page->descriptors); + } + + return 0; +} + +struct spdk_blob_load_ctx { + struct spdk_blob *blob; + + struct spdk_blob_md_page *pages; + uint32_t num_pages; + + spdk_bs_sequence_cpl cb_fn; + void *cb_arg; +}; + +static void +_spdk_blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_load_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_md_page *page; + int rc; + + page = &ctx->pages[ctx->num_pages - 1]; + + if (page->next != SPDK_INVALID_MD_PAGE) { + uint32_t next_page = page->next; + uint64_t next_lba = _spdk_bs_page_to_lba(blob->bs, blob->bs->md_start + next_page); + + + assert(next_lba < (blob->bs->md_start + blob->bs->md_len)); + + /* Read the next page */ + ctx->num_pages++; + ctx->pages = spdk_realloc(ctx->pages, (sizeof(*page) * ctx->num_pages), + sizeof(*page), NULL); + if (ctx->pages == NULL) { + ctx->cb_fn(seq, ctx->cb_arg, -ENOMEM); + free(ctx); + return; + } + + spdk_bs_sequence_read(seq, &ctx->pages[ctx->num_pages - 1], + next_lba, + _spdk_bs_byte_to_lba(blob->bs, sizeof(*page)), + _spdk_blob_load_cpl, ctx); + return; + } + + /* Parse the pages */ + rc = _spdk_blob_parse(ctx->pages, ctx->num_pages, blob); + + _spdk_blob_mark_clean(blob); + + ctx->cb_fn(seq, ctx->cb_arg, rc); + + /* Free the memory */ + spdk_free(ctx->pages); + free(ctx); +} + +/* Load a blob from disk given a blobid */ +static void +_spdk_blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_blob_load_ctx *ctx; + struct spdk_blob_store *bs; + uint32_t page_num; + uint64_t lba; + + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_CLEAN || + blob->state == SPDK_BLOB_STATE_DIRTY); + + bs = blob->bs; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(seq, cb_arg, -ENOMEM); + return; + } + + ctx->blob = blob; + ctx->pages = spdk_realloc(ctx->pages, sizeof(struct spdk_blob_md_page), + sizeof(struct spdk_blob_md_page), NULL); + if (!ctx->pages) { + free(ctx); + cb_fn(seq, cb_arg, -ENOMEM); + return; + } + ctx->num_pages = 1; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + page_num = _spdk_bs_blobid_to_page(blob->id); + lba = _spdk_bs_page_to_lba(blob->bs, bs->md_start + page_num); + + blob->state = SPDK_BLOB_STATE_LOADING; + + spdk_bs_sequence_read(seq, &ctx->pages[0], lba, + _spdk_bs_byte_to_lba(bs, sizeof(struct spdk_blob_md_page)), + _spdk_blob_load_cpl, ctx); +} + +struct spdk_blob_persist_ctx { + struct spdk_blob *blob; + + struct spdk_blob_md_page *pages; + + uint64_t idx; + + spdk_bs_sequence_cpl cb_fn; + void *cb_arg; +}; + +static void +_spdk_blob_persist_complete(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + + if (bserrno == 0) { + _spdk_blob_mark_clean(blob); + } + + /* Call user callback */ + ctx->cb_fn(seq, ctx->cb_arg, bserrno); + + /* Free the memory */ + spdk_free(ctx->pages); + free(ctx); +} + +static void +_spdk_blob_persist_unmap_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + void *tmp; + size_t i; + + /* Release all clusters that were truncated */ + for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) { + uint32_t cluster_num = _spdk_bs_lba_to_cluster(bs, blob->active.clusters[i]); + + _spdk_bs_release_cluster(bs, cluster_num); + } + + if (blob->active.num_clusters == 0) { + free(blob->active.clusters); + blob->active.clusters = NULL; + blob->active.cluster_array_size = 0; + } else { + tmp = realloc(blob->active.clusters, sizeof(uint64_t) * blob->active.num_clusters); + assert(tmp != NULL); + blob->active.clusters = tmp; + blob->active.cluster_array_size = blob->active.num_clusters; + } + + _spdk_blob_persist_complete(seq, ctx, bserrno); +} + +static void +_spdk_blob_persist_unmap_clusters(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + spdk_bs_batch_t *batch; + size_t i; + + /* Clusters don't move around in blobs. The list shrinks or grows + * at the end, but no changes ever occur in the middle of the list. + */ + + batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_unmap_clusters_cpl, ctx); + + /* Unmap all clusters that were truncated */ + for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) { + uint64_t lba = blob->active.clusters[i]; + uint32_t lba_count = _spdk_bs_cluster_to_lba(bs, 1); + + spdk_bs_batch_unmap(batch, lba, lba_count); + } + + spdk_bs_batch_close(batch); +} + +static void +_spdk_blob_persist_unmap_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + size_t i; + + /* This loop starts at 1 because the first page is special and handled + * below. The pages (except the first) are never written in place, + * so any pages in the clean list must be unmapped. + */ + for (i = 1; i < blob->clean.num_pages; i++) { + spdk_bit_array_clear(bs->used_md_pages, blob->clean.pages[i]); + } + + if (blob->active.num_pages == 0) { + uint32_t page_num; + + page_num = _spdk_bs_blobid_to_page(blob->id); + spdk_bit_array_clear(bs->used_md_pages, page_num); + } + + /* Move on to unmapping clusters */ + _spdk_blob_persist_unmap_clusters(seq, ctx, 0); +} + +static void +_spdk_blob_persist_unmap_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + uint64_t lba; + uint32_t lba_count; + spdk_bs_batch_t *batch; + size_t i; + + batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_unmap_pages_cpl, ctx); + + lba_count = _spdk_bs_byte_to_lba(bs, sizeof(struct spdk_blob_md_page)); + + /* This loop starts at 1 because the first page is special and handled + * below. The pages (except the first) are never written in place, + * so any pages in the clean list must be unmapped. + */ + for (i = 1; i < blob->clean.num_pages; i++) { + lba = _spdk_bs_page_to_lba(bs, bs->md_start + blob->clean.pages[i]); + + spdk_bs_batch_unmap(batch, lba, lba_count); + } + + /* The first page will only be unmapped if this is a delete. */ + if (blob->active.num_pages == 0) { + uint32_t page_num; + + /* The first page in the metadata goes where the blobid indicates */ + page_num = _spdk_bs_blobid_to_page(blob->id); + lba = _spdk_bs_page_to_lba(bs, bs->md_start + page_num); + + spdk_bs_batch_unmap(batch, lba, lba_count); + } + + spdk_bs_batch_close(batch); +} + +static void +_spdk_blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + uint64_t lba; + uint32_t lba_count; + struct spdk_blob_md_page *page; + + if (blob->active.num_pages == 0) { + /* Move on to the next step */ + _spdk_blob_persist_unmap_pages(seq, ctx, 0); + return; + } + + lba_count = _spdk_bs_byte_to_lba(bs, sizeof(*page)); + + page = &ctx->pages[0]; + /* The first page in the metadata goes where the blobid indicates */ + lba = _spdk_bs_page_to_lba(bs, bs->md_start + _spdk_bs_blobid_to_page(blob->id)); + + spdk_bs_sequence_write(seq, page, lba, lba_count, + _spdk_blob_persist_unmap_pages, ctx); +} + +static void +_spdk_blob_persist_write_page_chain(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob_persist_ctx *ctx = cb_arg; + struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; + uint64_t lba; + uint32_t lba_count; + struct spdk_blob_md_page *page; + spdk_bs_batch_t *batch; + size_t i; + + /* Clusters don't move around in blobs. The list shrinks or grows + * at the end, but no changes ever occur in the middle of the list. + */ + + lba_count = _spdk_bs_byte_to_lba(bs, sizeof(*page)); + + batch = spdk_bs_sequence_to_batch(seq, _spdk_blob_persist_write_page_root, ctx); + + /* This starts at 1. The root page is not written until + * all of the others are finished + */ + for (i = 1; i < blob->active.num_pages; i++) { + page = &ctx->pages[i]; + assert(page->sequence_num == i); + + lba = _spdk_bs_page_to_lba(bs, bs->md_start + blob->active.pages[i]); + + spdk_bs_batch_write(batch, page, lba, lba_count); + } + + spdk_bs_batch_close(batch); +} + +static int +_spdk_resize_blob(struct spdk_blob *blob, uint64_t sz) +{ + uint64_t i; + uint64_t *tmp; + uint64_t lfc; /* lowest free cluster */ + struct spdk_blob_store *bs; + + bs = blob->bs; + + assert(blob->state != SPDK_BLOB_STATE_LOADING && + blob->state != SPDK_BLOB_STATE_SYNCING); + + if (blob->active.num_clusters == sz) { + return 0; + } + + if (blob->active.num_clusters < blob->active.cluster_array_size) { + /* If this blob was resized to be larger, then smaller, then + * larger without syncing, then the cluster array already + * contains spare assigned clusters we can use. + */ + blob->active.num_clusters = spdk_min(blob->active.cluster_array_size, + sz); + } + + blob->state = SPDK_BLOB_STATE_DIRTY; + + /* Do two passes - one to verify that we can obtain enough clusters + * and another to actually claim them. + */ + + lfc = 0; + for (i = blob->active.num_clusters; i < sz; i++) { + lfc = spdk_bit_array_find_first_clear(bs->used_clusters, lfc); + if (lfc >= bs->total_clusters) { + /* No more free clusters. Cannot satisfy the request */ + assert(false); + return -1; + } + lfc++; + } + + if (sz > blob->active.num_clusters) { + /* Expand the cluster array if necessary. + * We only shrink the array when persisting. + */ + tmp = realloc(blob->active.clusters, sizeof(uint64_t) * sz); + if (sz > 0 && tmp == NULL) { + assert(false); + return -1; + } + blob->active.clusters = tmp; + blob->active.cluster_array_size = sz; + } + + lfc = 0; + for (i = blob->active.num_clusters; i < sz; i++) { + lfc = spdk_bit_array_find_first_clear(bs->used_clusters, lfc); + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Claiming cluster %lu for blob %lu\n", lfc, blob->id); + _spdk_bs_claim_cluster(bs, lfc); + blob->active.clusters[i] = _spdk_bs_cluster_to_lba(bs, lfc); + lfc++; + } + + blob->active.num_clusters = sz; + + return 0; +} + +/* Write a blob to disk */ +static void +_spdk_blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_blob_persist_ctx *ctx; + int rc; + uint64_t i; + uint32_t page_num; + struct spdk_blob_store *bs; + + assert(blob != NULL); + assert(blob->state == SPDK_BLOB_STATE_CLEAN || + blob->state == SPDK_BLOB_STATE_DIRTY); + + if (blob->state == SPDK_BLOB_STATE_CLEAN) { + cb_fn(seq, cb_arg, 0); + return; + } + + bs = blob->bs; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(seq, cb_arg, -ENOMEM); + return; + } + ctx->blob = blob; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + blob->state = SPDK_BLOB_STATE_SYNCING; + + if (blob->active.num_pages == 0) { + /* This is the signal that the blob should be deleted. + * Immediately jump to the clean up routine. */ + assert(blob->clean.num_pages > 0); + ctx->idx = blob->clean.num_pages - 1; + _spdk_blob_persist_unmap_pages(seq, ctx, 0); + return; + + } + + /* Generate the new metadata */ + rc = _spdk_blob_serialize(blob, &ctx->pages, &blob->active.num_pages); + if (rc < 0) { + free(ctx); + cb_fn(seq, cb_arg, rc); + return; + } + + assert(blob->active.num_pages >= 1); + + /* Resize the cache of page indices */ + blob->active.pages = realloc(blob->active.pages, + blob->active.num_pages * sizeof(*blob->active.pages)); + if (!blob->active.pages) { + free(ctx); + cb_fn(seq, cb_arg, -ENOMEM); + return; + } + + /* Assign this metadata to pages. This requires two passes - + * one to verify that there are enough pages and a second + * to actually claim them. */ + page_num = 0; + /* Note that this loop starts at one. The first page location is fixed by the blobid. */ + for (i = 1; i < blob->active.num_pages; i++) { + page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num); + if (page_num >= spdk_bit_array_capacity(bs->used_md_pages)) { + spdk_free(ctx->pages); + free(ctx); + blob->state = SPDK_BLOB_STATE_DIRTY; + cb_fn(seq, cb_arg, -ENOMEM); + return; + } + page_num++; + } + + page_num = 0; + blob->active.pages[0] = _spdk_bs_blobid_to_page(blob->id); + for (i = 1; i < blob->active.num_pages; i++) { + page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num); + ctx->pages[i - 1].next = page_num; + blob->active.pages[i] = page_num; + spdk_bit_array_set(bs->used_md_pages, page_num); + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Claiming page %u for blob %lu\n", page_num, blob->id); + page_num++; + } + + /* Start writing the metadata from last page to first */ + ctx->idx = blob->active.num_pages - 1; + _spdk_blob_persist_write_page_chain(seq, ctx, 0); +} + +static void +_spdk_blob_request_submit_rw(struct spdk_blob *blob, struct spdk_io_channel *_channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg, bool read) +{ + spdk_bs_batch_t *batch; + struct spdk_bs_cpl cpl; + uint64_t lba; + uint32_t lba_count; + uint8_t *buf; + uint64_t page; + + assert(blob != NULL); + + if (offset + length > blob->active.num_clusters * blob->bs->pages_per_cluster) { + cb_fn(cb_arg, -EINVAL); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC; + cpl.u.blob_basic.cb_fn = cb_fn; + cpl.u.blob_basic.cb_arg = cb_arg; + + batch = spdk_bs_batch_open(_channel, &cpl); + if (!batch) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + length = _spdk_bs_page_to_lba(blob->bs, length); + page = offset; + buf = payload; + while (length > 0) { + lba = _spdk_bs_blob_page_to_lba(blob, page); + lba_count = spdk_min(length, + _spdk_bs_page_to_lba(blob->bs, + _spdk_bs_num_pages_to_cluster_boundary(blob, page))); + + if (read) { + spdk_bs_batch_read(batch, buf, lba, lba_count); + } else { + spdk_bs_batch_write(batch, buf, lba, lba_count); + } + + length -= lba_count; + buf += _spdk_bs_lba_to_byte(blob->bs, lba_count); + page += _spdk_bs_lba_to_page(blob->bs, lba_count); + } + + spdk_bs_batch_close(batch); +} + +static struct spdk_blob * +_spdk_blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid) +{ + struct spdk_blob *blob; + + TAILQ_FOREACH(blob, &bs->blobs, link) { + if (blob->id == blobid) { + return blob; + } + } + + return NULL; +} + +static int +_spdk_bs_channel_create(void *io_device, uint32_t priority, void *ctx_buf, void *unique_ctx) +{ + struct spdk_blob_store *bs = io_device; + struct spdk_bs_dev *dev = bs->dev; + struct spdk_bs_channel *channel = ctx_buf; + uint32_t max_ops = *(uint32_t *)unique_ctx; + uint32_t i; + + channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set)); + if (!channel->req_mem) { + free(channel); + return -1; + } + + TAILQ_INIT(&channel->reqs); + + for (i = 0; i < max_ops; i++) { + TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link); + } + + channel->bs = bs; + channel->dev = dev; + channel->dev_channel = dev->create_channel(dev); + + return 0; +} + +static void +_spdk_bs_channel_destroy(void *io_device, void *ctx_buf) +{ + struct spdk_bs_channel *channel = ctx_buf; + + free(channel->req_mem); + channel->dev->destroy_channel(channel->dev, channel->dev_channel); +} + +static void +_spdk_bs_free(struct spdk_blob_store *bs) +{ + struct spdk_blob *blob, *blob_tmp; + + spdk_bs_unregister_md_thread(bs); + spdk_io_device_unregister(bs); + + TAILQ_FOREACH_SAFE(blob, &bs->blobs, link, blob_tmp) { + TAILQ_REMOVE(&bs->blobs, blob, link); + _spdk_blob_free(blob); + } + + spdk_bit_array_free(&bs->used_md_pages); + spdk_bit_array_free(&bs->used_clusters); + + bs->dev->destroy(bs->dev); + free(bs); +} + +void +spdk_bs_opts_init(struct spdk_bs_opts *opts) +{ + opts->cluster_sz = SPDK_BLOB_OPTS_CLUSTER_SZ; + opts->num_md_pages = SPDK_BLOB_OPTS_NUM_MD_PAGES; + opts->max_md_ops = SPDK_BLOB_OPTS_MAX_MD_OPS; +} + +static struct spdk_blob_store * +_spdk_bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts) +{ + struct spdk_blob_store *bs; + + bs = calloc(1, sizeof(struct spdk_blob_store)); + if (!bs) { + return NULL; + } + + TAILQ_INIT(&bs->blobs); + bs->dev = dev; + + /* + * Do not use _spdk_bs_lba_to_cluster() here since blockcnt may not be an + * even multiple of the cluster size. + */ + bs->cluster_sz = opts->cluster_sz; + bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen); + bs->pages_per_cluster = bs->cluster_sz / sizeof(struct spdk_blob_md_page); + bs->num_free_clusters = bs->total_clusters; + bs->used_clusters = spdk_bit_array_create(bs->total_clusters); + if (bs->used_clusters == NULL) { + _spdk_bs_free(bs); + return NULL; + } + + bs->max_md_ops = opts->max_md_ops; + bs->super_blob = SPDK_BLOBID_INVALID; + + /* The metadata is assumed to be at least 1 page */ + bs->used_md_pages = spdk_bit_array_create(1); + + spdk_io_device_register(bs, _spdk_bs_channel_create, _spdk_bs_channel_destroy, + sizeof(struct spdk_bs_channel)); + spdk_bs_register_md_thread(bs); + + return bs; +} + +/* START spdk_bs_load */ + +struct spdk_bs_load_ctx { + struct spdk_blob_store *bs; + struct spdk_bs_super_block *super; + + struct spdk_bs_md_mask *mask; +}; + +static void +_spdk_bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + uint32_t i, j; + int rc; + + /* The type must be correct */ + assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS); + /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */ + assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof( + struct spdk_blob_md_page) * 8)); + /* The length of the mask must be exactly equal to the total number of clusters*/ + assert(ctx->mask->length == ctx->bs->total_clusters); + + rc = spdk_bit_array_resize(&ctx->bs->used_clusters, ctx->bs->total_clusters); + if (rc < 0) { + spdk_free(ctx->super); + spdk_free(ctx->mask); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + + ctx->bs->num_free_clusters = ctx->bs->total_clusters; + for (i = 0; i < ctx->mask->length / 8; i++) { + uint8_t segment = ctx->mask->mask[i]; + for (j = 0; segment && (j < 8); j++) { + if (segment & 1U) { + spdk_bit_array_set(ctx->bs->used_clusters, (i * 8) + j); + assert(ctx->bs->num_free_clusters > 0); + ctx->bs->num_free_clusters--; + } + segment >>= 1U; + } + } + + spdk_free(ctx->super); + spdk_free(ctx->mask); + free(ctx); + + spdk_bs_sequence_finish(seq, bserrno); +} + +static void +_spdk_bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + uint64_t lba, lba_count; + uint32_t i, j; + int rc; + + /* The type must be correct */ + assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES); + /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */ + assert(ctx->mask->length <= (ctx->super->used_page_mask_len * sizeof(struct spdk_blob_md_page) * + 8)); + /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */ + assert(ctx->mask->length == ctx->super->md_len); + + rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length); + if (rc < 0) { + spdk_free(ctx->super); + spdk_free(ctx->mask); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + + for (i = 0; i < ctx->mask->length / 8; i++) { + uint8_t segment = ctx->mask->mask[i]; + for (j = 0; segment && (j < 8); j++) { + if (segment & 1U) { + spdk_bit_array_set(ctx->bs->used_md_pages, (i * 8) + j); + } + segment >>= 1U; + } + } + spdk_free(ctx->mask); + + /* Read the used clusters mask */ + ctx->mask = spdk_zmalloc(ctx->super->used_cluster_mask_len * sizeof(struct spdk_blob_md_page), + 0x1000, NULL); + if (!ctx->mask) { + spdk_free(ctx->super); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len); + spdk_bs_sequence_read(seq, ctx->mask, lba, lba_count, + _spdk_bs_load_used_clusters_cpl, ctx); +} + +static void +_spdk_bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_load_ctx *ctx = cb_arg; + uint64_t lba, lba_count; + + if (ctx->super->version != SPDK_BS_VERSION) { + spdk_free(ctx->super); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -EILSEQ); + return; + } + + if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG, + sizeof(ctx->super->signature)) != 0) { + spdk_free(ctx->super); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -EILSEQ); + return; + } + + if (ctx->super->clean != 1) { + /* TODO: ONLY CLEAN SHUTDOWN IS CURRENTLY SUPPORTED. + * All of the necessary data to recover is available + * on disk - the code just has not been written yet. + */ + assert(false); + spdk_free(ctx->super); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -EILSEQ); + return; + } + ctx->super->clean = 0; + + /* Parse the super block */ + ctx->bs->cluster_sz = ctx->super->cluster_size; + ctx->bs->total_clusters = ctx->bs->dev->blockcnt / (ctx->bs->cluster_sz / ctx->bs->dev->blocklen); + ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / sizeof(struct spdk_blob_md_page); + ctx->bs->md_start = ctx->super->md_start; + ctx->bs->md_len = ctx->super->md_len; + + /* Read the used pages mask */ + ctx->mask = spdk_zmalloc(ctx->super->used_page_mask_len * sizeof(struct spdk_blob_md_page), 0x1000, + NULL); + if (!ctx->mask) { + spdk_free(ctx->super); + _spdk_bs_free(ctx->bs); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len); + spdk_bs_sequence_read(seq, ctx->mask, lba, lba_count, + _spdk_bs_load_used_pages_cpl, ctx); +} + +void +spdk_bs_load(struct spdk_bs_dev *dev, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_blob_store *bs; + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + struct spdk_bs_load_ctx *ctx; + struct spdk_bs_opts opts = {}; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Loading blobstore from dev %p\n", dev); + + spdk_bs_opts_init(&opts); + + bs = _spdk_bs_alloc(dev, &opts); + if (!bs) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx->bs = bs; + + /* Allocate memory for the super block */ + ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL); + if (!ctx->super) { + free(ctx); + _spdk_bs_free(bs); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE; + cpl.u.bs_handle.cb_fn = cb_fn; + cpl.u.bs_handle.cb_arg = cb_arg; + cpl.u.bs_handle.bs = bs; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + spdk_free(ctx->super); + free(ctx); + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + /* Read the super block */ + spdk_bs_sequence_read(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0), + _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)), + _spdk_bs_load_super_cpl, ctx); +} + +/* END spdk_bs_load */ + +/* START spdk_bs_init */ + +struct spdk_bs_init_ctx { + struct spdk_blob_store *bs; + struct spdk_bs_super_block *super; +}; + +static void +_spdk_bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_init_ctx *ctx = cb_arg; + + spdk_free(ctx->super); + free(ctx); + + spdk_bs_sequence_finish(seq, bserrno); +} + +static void +_spdk_bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_init_ctx *ctx = cb_arg; + + /* Write super block */ + spdk_bs_sequence_write(seq, ctx->super, _spdk_bs_page_to_lba(ctx->bs, 0), + _spdk_bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)), + _spdk_bs_init_persist_super_cpl, ctx); +} + +void +spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o, + spdk_bs_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_init_ctx *ctx; + struct spdk_blob_store *bs; + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + uint64_t num_md_pages; + uint32_t i; + struct spdk_bs_opts opts = {}; + int rc; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Initializing blobstore on dev %p\n", dev); + + if (o) { + opts = *o; + } else { + spdk_bs_opts_init(&opts); + } + + bs = _spdk_bs_alloc(dev, &opts); + if (!bs) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + if (opts.num_md_pages == UINT32_MAX) { + /* By default, allocate 1 page per cluster. + * Technically, this over-allocates metadata + * because more metadata will reduce the number + * of usable clusters. This can be addressed with + * more complex math in the future. + */ + bs->md_len = bs->total_clusters; + } else { + bs->md_len = opts.num_md_pages; + } + + rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len); + if (rc < 0) { + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx->bs = bs; + + /* Allocate memory for the super block */ + ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL); + if (!ctx->super) { + free(ctx); + _spdk_bs_free(bs); + return; + } + memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG, + sizeof(ctx->super->signature)); + ctx->super->version = SPDK_BS_VERSION; + ctx->super->length = sizeof(*ctx->super); + ctx->super->super_blob = bs->super_blob; + ctx->super->clean = 0; + ctx->super->cluster_size = bs->cluster_sz; + + /* Calculate how many pages the metadata consumes at the front + * of the disk. + */ + + /* The super block uses 1 page */ + num_md_pages = 1; + + /* The used_md_pages mask requires 1 bit per metadata page, rounded + * up to the nearest page, plus a header. + */ + ctx->super->used_page_mask_start = num_md_pages; + ctx->super->used_page_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) + + divide_round_up(bs->md_len, 8), + sizeof(struct spdk_blob_md_page)); + num_md_pages += ctx->super->used_page_mask_len; + + /* The used_clusters mask requires 1 bit per cluster, rounded + * up to the nearest page, plus a header. + */ + ctx->super->used_cluster_mask_start = num_md_pages; + ctx->super->used_cluster_mask_len = divide_round_up(sizeof(struct spdk_bs_md_mask) + + divide_round_up(bs->total_clusters, 8), + sizeof(struct spdk_blob_md_page)); + num_md_pages += ctx->super->used_cluster_mask_len; + + /* The metadata region size was chosen above */ + ctx->super->md_start = bs->md_start = num_md_pages; + ctx->super->md_len = bs->md_len; + num_md_pages += bs->md_len; + + /* Claim all of the clusters used by the metadata */ + for (i = 0; i < divide_round_up(num_md_pages, bs->pages_per_cluster); i++) { + _spdk_bs_claim_cluster(bs, i); + } + + cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE; + cpl.u.bs_handle.cb_fn = cb_fn; + cpl.u.bs_handle.cb_arg = cb_arg; + cpl.u.bs_handle.bs = bs; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + spdk_free(ctx->super); + free(ctx); + _spdk_bs_free(bs); + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + /* TRIM the entire device */ + spdk_bs_sequence_unmap(seq, 0, bs->dev->blockcnt, _spdk_bs_init_trim_cpl, ctx); +} + +/* END spdk_bs_init */ + +/* START spdk_bs_unload */ + +struct spdk_bs_unload_ctx { + struct spdk_blob_store *bs; + struct spdk_bs_super_block *super; + + struct spdk_bs_md_mask *mask; +}; + +static void +_spdk_bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_unload_ctx *ctx = cb_arg; + + spdk_free(ctx->super); + + spdk_bs_sequence_finish(seq, bserrno); + + _spdk_bs_free(ctx->bs); + free(ctx); +} + +static void +_spdk_bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_unload_ctx *ctx = cb_arg; + + spdk_free(ctx->mask); + + /* Update the values in the super block */ + ctx->super->super_blob = ctx->bs->super_blob; + ctx->super->clean = 1; + + spdk_bs_sequence_write(seq, ctx->super, _spdk_bs_page_to_lba(ctx->bs, 0), + _spdk_bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)), + _spdk_bs_unload_write_super_cpl, ctx); +} + +static void +_spdk_bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_unload_ctx *ctx = cb_arg; + uint32_t i; + uint64_t lba, lba_count; + + spdk_free(ctx->mask); + + /* Write out the used clusters mask */ + ctx->mask = spdk_zmalloc(ctx->super->used_cluster_mask_len * sizeof(struct spdk_blob_md_page), + 0x1000, NULL); + if (!ctx->mask) { + spdk_free(ctx->super); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + + ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS; + ctx->mask->length = ctx->bs->total_clusters; + assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_clusters)); + + i = 0; + while (true) { + i = spdk_bit_array_find_first_set(ctx->bs->used_clusters, i); + if (i > ctx->mask->length) { + break; + } + ctx->mask->mask[i / 8] |= 1U << (i % 8); + i++; + } + + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len); + spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, + _spdk_bs_unload_write_used_clusters_cpl, ctx); +} + +static void +_spdk_bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_bs_unload_ctx *ctx = cb_arg; + uint32_t i; + uint64_t lba, lba_count; + + /* Write out the used page mask */ + ctx->mask = spdk_zmalloc(ctx->super->used_page_mask_len * sizeof(struct spdk_blob_md_page), + 0x1000, NULL); + if (!ctx->mask) { + spdk_free(ctx->super); + free(ctx); + spdk_bs_sequence_finish(seq, -ENOMEM); + return; + } + + ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES; + ctx->mask->length = ctx->super->md_len; + assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages)); + + i = 0; + while (true) { + i = spdk_bit_array_find_first_set(ctx->bs->used_md_pages, i); + if (i > ctx->mask->length) { + break; + } + ctx->mask->mask[i / 8] |= 1U << (i % 8); + i++; + } + + lba = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start); + lba_count = _spdk_bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len); + spdk_bs_sequence_write(seq, ctx->mask, lba, lba_count, + _spdk_bs_unload_write_used_pages_cpl, ctx); +} + +void +spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + struct spdk_bs_unload_ctx *ctx; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Syncing blobstore\n"); + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + ctx->bs = bs; + + ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL); + if (!ctx->super) { + free(ctx); + cb_fn(cb_arg, -ENOMEM); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC; + cpl.u.bs_basic.cb_fn = cb_fn; + cpl.u.bs_basic.cb_arg = cb_arg; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + spdk_free(ctx->super); + free(ctx); + cb_fn(cb_arg, -ENOMEM); + return; + } + + assert(TAILQ_EMPTY(&bs->blobs)); + + /* Read super block */ + spdk_bs_sequence_read(seq, ctx->super, _spdk_bs_page_to_lba(bs, 0), + _spdk_bs_byte_to_lba(bs, sizeof(*ctx->super)), + _spdk_bs_unload_read_super_cpl, ctx); +} + +/* END spdk_bs_unload */ + +void +spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_bs_op_complete cb_fn, void *cb_arg) +{ + bs->super_blob = blobid; + cb_fn(cb_arg, 0); +} + +void +spdk_bs_get_super(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg) +{ + if (bs->super_blob == SPDK_BLOBID_INVALID) { + cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT); + } else { + cb_fn(cb_arg, bs->super_blob, 0); + } +} + +uint64_t +spdk_bs_get_cluster_size(struct spdk_blob_store *bs) +{ + return bs->cluster_sz; +} + +uint64_t +spdk_bs_get_page_size(struct spdk_blob_store *bs) +{ + return sizeof(struct spdk_blob_md_page); +} + +uint64_t +spdk_bs_free_cluster_count(struct spdk_blob_store *bs) +{ + return bs->num_free_clusters; +} + +int spdk_bs_register_md_thread(struct spdk_blob_store *bs) +{ + bs->md_channel = spdk_get_io_channel(bs, SPDK_IO_PRIORITY_DEFAULT, true, + (void *)&bs->max_md_ops); + + return 0; +} + +int spdk_bs_unregister_md_thread(struct spdk_blob_store *bs) +{ + spdk_put_io_channel(bs->md_channel); + + return 0; +} + +spdk_blob_id spdk_blob_get_id(struct spdk_blob *blob) +{ + assert(blob != NULL); + + return blob->id; +} + +uint64_t spdk_blob_get_num_pages(struct spdk_blob *blob) +{ + assert(blob != NULL); + + return _spdk_bs_cluster_to_page(blob->bs, blob->active.num_clusters); +} + +uint64_t spdk_blob_get_num_clusters(struct spdk_blob *blob) +{ + assert(blob != NULL); + + return blob->active.num_clusters; +} + +/* START spdk_bs_md_create_blob */ + +static void +_spdk_bs_md_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob *blob = cb_arg; + + _spdk_blob_free(blob); + + spdk_bs_sequence_finish(seq, bserrno); +} + +void spdk_bs_md_create_blob(struct spdk_blob_store *bs, + spdk_blob_op_with_id_complete cb_fn, void *cb_arg) +{ + struct spdk_blob *blob; + uint32_t page_idx; + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + spdk_blob_id id; + + page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0); + if (page_idx >= spdk_bit_array_capacity(bs->used_md_pages)) { + cb_fn(cb_arg, 0, -ENOMEM); + return; + } + spdk_bit_array_set(bs->used_md_pages, page_idx); + + /* The blob id is a 64 bit number. The lower 32 bits are the page_idx. The upper + * 32 bits are not currently used. Stick a 1 there just to catch bugs where the + * code assumes blob id == page_idx. + */ + id = (1ULL << 32) | page_idx; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Creating blob with id %lu at page %u\n", id, page_idx); + + blob = _spdk_blob_alloc(bs, id); + if (!blob) { + cb_fn(cb_arg, 0, -ENOMEM); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BLOBID; + cpl.u.blobid.cb_fn = cb_fn; + cpl.u.blobid.cb_arg = cb_arg; + cpl.u.blobid.blobid = blob->id; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + free(blob); + cb_fn(cb_arg, 0, -ENOMEM); + return; + } + + _spdk_blob_persist(seq, blob, _spdk_bs_md_create_blob_cpl, blob); +} + +/* END spdk_bs_md_create_blob */ + +/* START spdk_bs_md_resize_blob */ +int +spdk_bs_md_resize_blob(struct spdk_blob *blob, uint64_t sz) +{ + int rc; + + assert(blob != NULL); + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Resizing blob %lu to %lu clusters\n", blob->id, sz); + + if (sz == blob->active.num_clusters) { + return 0; + } + + rc = _spdk_resize_blob(blob, sz); + if (rc < 0) { + return rc; + } + + return 0; +} + +/* END spdk_bs_md_resize_blob */ + + +/* START spdk_bs_md_delete_blob */ + +static void +_spdk_bs_md_delete_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob *blob = cb_arg; + + _spdk_blob_free(blob); + + spdk_bs_sequence_finish(seq, bserrno); +} + +static void +_spdk_bs_md_delete_open_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob *blob = cb_arg; + + blob->state = SPDK_BLOB_STATE_DIRTY; + blob->active.num_pages = 0; + _spdk_resize_blob(blob, 0); + + _spdk_blob_persist(seq, blob, _spdk_bs_md_delete_blob_cpl, blob); +} + +void +spdk_bs_md_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + struct spdk_blob *blob; + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Deleting blob %lu\n", blobid); + + blob = _spdk_blob_lookup(bs, blobid); + if (blob) { + assert(blob->open_ref > 0); + cb_fn(cb_arg, -EINVAL); + return; + } + + blob = _spdk_blob_alloc(bs, blobid); + if (!blob) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC; + cpl.u.blob_basic.cb_fn = cb_fn; + cpl.u.blob_basic.cb_arg = cb_arg; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + _spdk_blob_load(seq, blob, _spdk_bs_md_delete_open_cpl, blob); +} + +/* END spdk_bs_md_delete_blob */ + +/* START spdk_bs_md_open_blob */ + +static void +_spdk_bs_md_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob *blob = cb_arg; + + blob->open_ref++; + + TAILQ_INSERT_HEAD(&blob->bs->blobs, blob, link); + + spdk_bs_sequence_finish(seq, bserrno); +} + +void spdk_bs_md_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_blob *blob; + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + uint32_t page_num; + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Opening blob %lu\n", blobid); + + blob = _spdk_blob_lookup(bs, blobid); + if (blob) { + blob->open_ref++; + cb_fn(cb_arg, blob, 0); + return; + } + + page_num = _spdk_bs_blobid_to_page(blobid); + if (spdk_bit_array_get(bs->used_md_pages, page_num) == false) { + /* Invalid blobid */ + cb_fn(cb_arg, NULL, -ENOENT); + return; + } + + blob = _spdk_blob_alloc(bs, blobid); + if (!blob) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE; + cpl.u.blob_handle.cb_fn = cb_fn; + cpl.u.blob_handle.cb_arg = cb_arg; + cpl.u.blob_handle.blob = blob; + + seq = spdk_bs_sequence_start(bs->md_channel, &cpl); + if (!seq) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + _spdk_blob_load(seq, blob, _spdk_bs_md_open_blob_cpl, blob); +} + +/* START spdk_bs_md_sync_blob */ +static void +_spdk_blob_sync_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + spdk_bs_sequence_finish(seq, bserrno); +} + +void spdk_bs_md_sync_blob(struct spdk_blob *blob, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_cpl cpl; + spdk_bs_sequence_t *seq; + + assert(blob != NULL); + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Syncing blob %lu\n", blob->id); + + assert(blob->state != SPDK_BLOB_STATE_LOADING && + blob->state != SPDK_BLOB_STATE_SYNCING); + + if (blob->state == SPDK_BLOB_STATE_CLEAN) { + cb_fn(cb_arg, 0); + return; + } + + cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC; + cpl.u.blob_basic.cb_fn = cb_fn; + cpl.u.blob_basic.cb_arg = cb_arg; + + seq = spdk_bs_sequence_start(blob->bs->md_channel, &cpl); + if (!seq) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + _spdk_blob_persist(seq, blob, _spdk_blob_sync_cpl, blob); +} + +/* END spdk_bs_md_sync_blob */ + +/* START spdk_bs_md_close_blob */ + +static void +_spdk_blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno) +{ + struct spdk_blob **blob = cb_arg; + + if ((*blob)->open_ref == 0) { + TAILQ_REMOVE(&(*blob)->bs->blobs, (*blob), link); + _spdk_blob_free((*blob)); + } + + *blob = NULL; + + spdk_bs_sequence_finish(seq, bserrno); +} + +void spdk_bs_md_close_blob(struct spdk_blob **b, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_cpl cpl; + struct spdk_blob *blob; + spdk_bs_sequence_t *seq; + + assert(b != NULL); + blob = *b; + assert(blob != NULL); + + SPDK_TRACELOG(SPDK_TRACE_BLOB, "Closing blob %lu\n", blob->id); + + assert(blob->state != SPDK_BLOB_STATE_LOADING && + blob->state != SPDK_BLOB_STATE_SYNCING); + + if (blob->open_ref == 0) { + cb_fn(cb_arg, -EBADF); + return; + } + + blob->open_ref--; + + cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC; + cpl.u.blob_basic.cb_fn = cb_fn; + cpl.u.blob_basic.cb_arg = cb_arg; + + seq = spdk_bs_sequence_start(blob->bs->md_channel, &cpl); + if (!seq) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + if (blob->state == SPDK_BLOB_STATE_CLEAN) { + _spdk_blob_close_cpl(seq, b, 0); + return; + } + + /* Sync metadata */ + _spdk_blob_persist(seq, blob, _spdk_blob_close_cpl, b); +} + +/* END spdk_bs_md_close_blob */ + +struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs, + uint32_t priority, uint32_t max_ops) +{ + return spdk_get_io_channel(bs, priority, true, (void *)&max_ops); +} + +void spdk_bs_free_io_channel(struct spdk_io_channel *channel) +{ + spdk_put_io_channel(channel); +} + +void spdk_bs_io_flush_channel(struct spdk_io_channel *channel, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + /* Flush is synchronous right now */ + cb_fn(cb_arg, 0); +} + +void spdk_bs_io_write_blob(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + _spdk_blob_request_submit_rw(blob, channel, payload, offset, length, cb_fn, cb_arg, false); +} + +void spdk_bs_io_read_blob(struct spdk_blob *blob, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_blob_op_complete cb_fn, void *cb_arg) +{ + _spdk_blob_request_submit_rw(blob, channel, payload, offset, length, cb_fn, cb_arg, true); +} + +struct spdk_bs_iter_ctx { + int64_t page_num; + struct spdk_blob_store *bs; + + spdk_blob_op_with_handle_complete cb_fn; + void *cb_arg; +}; + +static void +_spdk_bs_iter_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno) +{ + struct spdk_bs_iter_ctx *ctx = cb_arg; + struct spdk_blob_store *bs = ctx->bs; + spdk_blob_id id; + + if (bserrno == 0) { + ctx->cb_fn(ctx->cb_arg, blob, bserrno); + free(ctx); + return; + } + + ctx->page_num++; + ctx->page_num = spdk_bit_array_find_first_set(bs->used_md_pages, ctx->page_num); + if (ctx->page_num >= spdk_bit_array_capacity(bs->used_md_pages)) { + ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT); + free(ctx); + return; + } + + id = (1ULL << 32) | ctx->page_num; + + blob = _spdk_blob_lookup(bs, id); + if (blob) { + blob->open_ref++; + ctx->cb_fn(ctx->cb_arg, blob, 0); + free(ctx); + return; + } + + spdk_bs_md_open_blob(bs, id, _spdk_bs_iter_cpl, ctx); +} + +void +spdk_bs_md_iter_first(struct spdk_blob_store *bs, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_iter_ctx *ctx; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx->page_num = -1; + ctx->bs = bs; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + _spdk_bs_iter_cpl(ctx, NULL, -1); +} + +static void +_spdk_bs_iter_close_cpl(void *cb_arg, int bserrno) +{ + struct spdk_bs_iter_ctx *ctx = cb_arg; + + _spdk_bs_iter_cpl(ctx, NULL, -1); +} + +void +spdk_bs_md_iter_next(struct spdk_blob_store *bs, struct spdk_blob **b, + spdk_blob_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_bs_iter_ctx *ctx; + struct spdk_blob *blob; + + assert(b != NULL); + blob = *b; + assert(blob != NULL); + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + ctx->page_num = _spdk_bs_blobid_to_page(blob->id); + ctx->bs = bs; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + /* Close the existing blob */ + spdk_bs_md_close_blob(b, _spdk_bs_iter_close_cpl, ctx); +} + +int +spdk_blob_md_set_xattr(struct spdk_blob *blob, const char *name, const void *value, + uint16_t value_len) +{ + struct spdk_xattr *xattr; + + assert(blob != NULL); + + assert(blob->state != SPDK_BLOB_STATE_LOADING && + blob->state != SPDK_BLOB_STATE_SYNCING); + + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + if (!strcmp(name, xattr->name)) { + free(xattr->value); + xattr->value_len = value_len; + xattr->value = malloc(value_len); + memcpy(xattr->value, value, value_len); + + blob->state = SPDK_BLOB_STATE_DIRTY; + + return 0; + } + } + + /* + * This is probably all going to rewritten, so do not bother checking for failed + * allocations for now. + */ + xattr = calloc(1, sizeof(*xattr)); + xattr->name = strdup(name); + xattr->value_len = value_len; + xattr->value = malloc(value_len); + memcpy(xattr->value, value, value_len); + TAILQ_INSERT_TAIL(&blob->xattrs, xattr, link); + + blob->state = SPDK_BLOB_STATE_DIRTY; + + return 0; +} + +int +spdk_blob_md_remove_xattr(struct spdk_blob *blob, const char *name) +{ + struct spdk_xattr *xattr; + + assert(blob != NULL); + + assert(blob->state != SPDK_BLOB_STATE_LOADING && + blob->state != SPDK_BLOB_STATE_SYNCING); + + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + if (!strcmp(name, xattr->name)) { + TAILQ_REMOVE(&blob->xattrs, xattr, link); + free(xattr->value); + free(xattr->name); + free(xattr); + + blob->state = SPDK_BLOB_STATE_DIRTY; + + return 0; + } + } + + return -ENOENT; +} + +int +spdk_bs_md_get_xattr_value(struct spdk_blob *blob, const char *name, + const void **value, size_t *value_len) +{ + struct spdk_xattr *xattr; + + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + if (!strcmp(name, xattr->name)) { + *value = xattr->value; + *value_len = xattr->value_len; + return 0; + } + } + + return -ENOENT; +} + +struct spdk_xattr_names { + uint32_t count; + const char *names[0]; +}; + +int +spdk_bs_md_get_xattr_names(struct spdk_blob *blob, + struct spdk_xattr_names **names) +{ + struct spdk_xattr *xattr; + int count = 0; + + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + count++; + } + + *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *)); + if (*names == NULL) { + return -ENOMEM; + } + + TAILQ_FOREACH(xattr, &blob->xattrs, link) { + (*names)->names[(*names)->count++] = xattr->name; + } + + return 0; +} + +uint32_t +spdk_xattr_names_get_count(struct spdk_xattr_names *names) +{ + assert(names != NULL); + + return names->count; +} + +const char * +spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index) +{ + if (index >= names->count) { + return NULL; + } + + return names->names[index]; +} + +void +spdk_xattr_names_free(struct spdk_xattr_names *names) +{ + free(names); +} + +SPDK_LOG_REGISTER_TRACE_FLAG("blob", SPDK_TRACE_BLOB); diff --git a/lib/blob/blobstore.h b/lib/blob/blobstore.h new file mode 100644 index 000000000..fdc2baffd --- /dev/null +++ b/lib/blob/blobstore.h @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BLOBSTORE_H +#define SPDK_BLOBSTORE_H + +#include "spdk/assert.h" +#include "spdk/blob.h" +#include "spdk/queue.h" +#include "spdk/util.h" + +/* In Memory Data Structures + * + * The following data structures exist only in memory. + */ + +#define SPDK_BLOB_OPTS_CLUSTER_SZ (1024 * 1024) +#define SPDK_BLOB_OPTS_NUM_MD_PAGES UINT32_MAX +#define SPDK_BLOB_OPTS_MAX_MD_OPS 32 + +struct spdk_xattr { + /* TODO: reorder for best packing */ + uint32_t index; + char *name; + void *value; + uint16_t value_len; + TAILQ_ENTRY(spdk_xattr) link; +}; + +/* The mutable part of the blob data that is sync'd to + * disk. The data in here is both mutable and persistent. + */ +struct spdk_blob_mut_data { + /* Number of data clusters in the blob */ + uint64_t num_clusters; + + /* Array LBAs that are the beginning of a cluster, in + * the order they appear in the blob. + */ + uint64_t *clusters; + + /* The size of the clusters array. This is greater than or + * equal to 'num_clusters'. + */ + size_t cluster_array_size; + + /* Number of metadata pages */ + uint32_t num_pages; + + /* Array of page offsets into the metadata region, in + * the order of the metadata page sequence. + */ + uint32_t *pages; +}; + +enum spdk_blob_state { + /* The blob in-memory version does not match the on-disk + * version. + */ + SPDK_BLOB_STATE_DIRTY, + + /* The blob in memory version of the blob matches the on disk + * version. + */ + SPDK_BLOB_STATE_CLEAN, + + /* The in-memory state being synchronized with the on-disk + * blob state. */ + SPDK_BLOB_STATE_LOADING, + + /* The disk state is being synchronized with the current + * blob state. + */ + SPDK_BLOB_STATE_SYNCING, +}; + +struct spdk_blob { + struct spdk_blob_store *bs; + + uint32_t open_ref; + + spdk_blob_id id; + + enum spdk_blob_state state; + + /* Two copies of the mutable data. One is a version + * that matches the last known data on disk (clean). + * The other (active) is the current data. Syncing + * a blob makes the clean match the active. + */ + struct spdk_blob_mut_data clean; + struct spdk_blob_mut_data active; + + /* TODO: The xattrs are mutable, but we don't want to be + * copying them unecessarily. Figure this out. + */ + TAILQ_HEAD(, spdk_xattr) xattrs; + + TAILQ_ENTRY(spdk_blob) link; +}; + +struct spdk_blob_store { + uint64_t md_start; /* Offset from beginning of disk, in pages */ + uint32_t md_len; /* Count, in pages */ + struct spdk_io_channel *md_channel; + + struct spdk_bs_dev *dev; + + struct spdk_bit_array *used_md_pages; + struct spdk_bit_array *used_clusters; + + uint32_t cluster_sz; + uint64_t total_clusters; + uint64_t num_free_clusters; + uint32_t pages_per_cluster; + + uint32_t max_md_ops; + + spdk_blob_id super_blob; + + TAILQ_HEAD(, spdk_blob) blobs; +}; + +struct spdk_bs_channel { + struct spdk_bs_request_set *req_mem; + TAILQ_HEAD(, spdk_bs_request_set) reqs; + + struct spdk_blob_store *bs; + + struct spdk_bs_dev *dev; + struct spdk_io_channel *dev_channel; +}; + +/* On-Disk Data Structures + * + * The following data structures exist on disk. + */ +#define SPDK_BS_VERSION 1 + +#pragma pack(push, 1) + +#define SPDK_MD_MASK_TYPE_USED_PAGES 0 +#define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1 + +struct spdk_bs_md_mask { + uint8_t type; + uint32_t length; /* In bits */ + uint8_t mask[0]; +}; + +#define SPDK_MD_DESCRIPTOR_TYPE_PADDING 0 +#define SPDK_MD_DESCRIPTOR_TYPE_EXTENT 1 +#define SPDK_MD_DESCRIPTOR_TYPE_XATTR 2 + +struct spdk_blob_md_descriptor_xattr { + uint8_t type; + uint32_t length; + + uint16_t name_length; + uint16_t value_length; + + char name[0]; + /* String name immediately followed by string value. */ +}; + +struct spdk_blob_md_descriptor_extent { + uint8_t type; + uint32_t length; + + struct { + uint32_t cluster_idx; + uint32_t length; /* In units of clusters */ + } extents[0]; +}; + +struct spdk_blob_md_descriptor { + uint8_t type; + uint32_t length; +}; + +#define SPDK_INVALID_MD_PAGE UINT32_MAX + +struct spdk_blob_md_page { + spdk_blob_id id; + + uint32_t sequence_num; + uint32_t reserved0; + + /* Descriptors here */ + uint64_t descriptors[509]; + + uint32_t next; + uint32_t crc; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_md_page) == 0x1000, "Invalid md page size"); + +#define SPDK_BS_SUPER_BLOCK_SIG "SPDKBLOB" + +struct spdk_bs_super_block { + uint8_t signature[8]; + uint32_t version; + uint32_t length; + uint32_t clean; /* If there was a clean shutdown, this is 1. */ + spdk_blob_id super_blob; + + uint32_t cluster_size; /* In bytes */ + + uint32_t used_page_mask_start; /* Offset from beginning of disk, in pages */ + uint32_t used_page_mask_len; /* Count, in pages */ + + uint32_t used_cluster_mask_start; /* Offset from beginning of disk, in pages */ + uint32_t used_cluster_mask_len; /* Count, in pages */ + + uint32_t md_start; /* Offset from beginning of disk, in pages */ + uint32_t md_len; /* Count, in pages */ + + uint8_t reserved[4040]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size"); + +#pragma pack(pop) + +/* Unit Conversions + * + * The blobstore works with several different units: + * - Byte: Self explanatory + * - LBA: The logical blocks on the backing storage device. + * - Page: The read/write units of blobs and metadata. This is + * an offset into a blob in units of 4KiB. + * - Cluster Index: The disk is broken into a sequential list of + * clusters. This is the offset from the beginning. + * + * NOTE: These conversions all act on simple magnitudes, not with any sort + * of knowledge about the blobs themselves. For instance, converting + * a page to an lba with the conversion function below simply converts + * a number of pages to an equivalent number of lbas, but that + * lba certainly isn't the right lba that corresponds to a page offset + * for a particular blob. + */ +static inline uint64_t +_spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length) +{ + assert(length % bs->dev->blocklen == 0); + + return length / bs->dev->blocklen; +} + +static inline uint64_t +_spdk_bs_lba_to_byte(struct spdk_blob_store *bs, uint64_t lba) +{ + return lba * bs->dev->blocklen; +} + +static inline uint64_t +_spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page) +{ + return page * sizeof(struct spdk_blob_md_page) / bs->dev->blocklen; +} + +static inline uint32_t +_spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba) +{ + uint64_t lbas_per_page; + + lbas_per_page = sizeof(struct spdk_blob_md_page) / bs->dev->blocklen; + + assert(lba % lbas_per_page == 0); + + return lba / lbas_per_page; +} + +static inline uint64_t +_spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster) +{ + return cluster * bs->pages_per_cluster; +} + +static inline uint32_t +_spdk_bs_page_to_cluster(struct spdk_blob_store *bs, uint64_t page) +{ + assert(page % bs->pages_per_cluster == 0); + + return page / bs->pages_per_cluster; +} + +static inline uint64_t +_spdk_bs_cluster_to_lba(struct spdk_blob_store *bs, uint32_t cluster) +{ + return cluster * (bs->cluster_sz / bs->dev->blocklen); +} + +static inline uint32_t +_spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba) +{ + assert(lba % (bs->cluster_sz / bs->dev->blocklen) == 0); + + return lba / (bs->cluster_sz / bs->dev->blocklen); +} + +/* End basic conversions */ + +static inline uint32_t +_spdk_bs_blobid_to_page(spdk_blob_id id) +{ + return id & 0xFFFFFFFF; +} + +/* Given a page offset into a blob, look up the LBA for the + * start of that page. + */ +static inline uint64_t +_spdk_bs_blob_page_to_lba(struct spdk_blob *blob, uint32_t page) +{ + uint64_t lba; + uint32_t pages_per_cluster; + + pages_per_cluster = blob->bs->pages_per_cluster; + + assert(page < blob->active.num_clusters * pages_per_cluster); + + lba = blob->active.clusters[page / pages_per_cluster]; + lba += _spdk_bs_page_to_lba(blob->bs, page % pages_per_cluster); + + return lba; +} + +/* Given a page offset into a blob, look up the number of pages until the + * next cluster boundary. + */ +static inline uint32_t +_spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob *blob, uint32_t page) +{ + uint32_t pages_per_cluster; + + pages_per_cluster = blob->bs->pages_per_cluster; + + return pages_per_cluster - (page % pages_per_cluster); +} + +#endif diff --git a/lib/blob/request.c b/lib/blob/request.c new file mode 100644 index 000000000..0301e0640 --- /dev/null +++ b/lib/blob/request.c @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "blobstore.h" +#include "request.h" + +#include "spdk/io_channel.h" +#include "spdk/queue.h" + +#include "spdk_internal/log.h" + +void +spdk_bs_call_cpl(struct spdk_bs_cpl *cpl, int bserrno) +{ + switch (cpl->type) { + case SPDK_BS_CPL_TYPE_BS_BASIC: + cpl->u.bs_basic.cb_fn(cpl->u.bs_basic.cb_arg, + bserrno); + break; + case SPDK_BS_CPL_TYPE_BS_HANDLE: + cpl->u.bs_handle.cb_fn(cpl->u.bs_handle.cb_arg, + cpl->u.bs_handle.bs, + bserrno); + break; + case SPDK_BS_CPL_TYPE_BLOB_BASIC: + cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, + bserrno); + break; + case SPDK_BS_CPL_TYPE_BLOBID: + cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, + cpl->u.blobid.blobid, + bserrno); + break; + case SPDK_BS_CPL_TYPE_BLOB_HANDLE: + cpl->u.blob_handle.cb_fn(cpl->u.blob_handle.cb_arg, + cpl->u.blob_handle.blob, + bserrno); + break; + case SPDK_BS_CPL_TYPE_NESTED_SEQUENCE: + cpl->u.nested_seq.cb_fn(cpl->u.nested_seq.cb_arg, + cpl->u.nested_seq.parent, + bserrno); + break; + } +} + +static void +spdk_bs_request_set_complete(struct spdk_bs_request_set *set) +{ + struct spdk_bs_cpl cpl = set->cpl; + int bserrno = set->bserrno; + + TAILQ_INSERT_TAIL(&set->channel->reqs, set, link); + + spdk_bs_call_cpl(&cpl, bserrno); +} + +static void +spdk_bs_sequence_completion(struct spdk_io_channel *channel, void *cb_arg, int bserrno) +{ + struct spdk_bs_request_set *set = cb_arg; + + set->bserrno = bserrno; + set->u.sequence.cb_fn((spdk_bs_sequence_t *)set, set->u.sequence.cb_arg, bserrno); +} + +spdk_bs_sequence_t * +spdk_bs_sequence_start(struct spdk_io_channel *_channel, + struct spdk_bs_cpl *cpl) +{ + struct spdk_bs_channel *channel; + struct spdk_bs_request_set *set; + + channel = spdk_io_channel_get_ctx(_channel); + + set = TAILQ_FIRST(&channel->reqs); + if (!set) { + return NULL; + } + TAILQ_REMOVE(&channel->reqs, set, link); + + set->cpl = *cpl; + set->bserrno = 0; + set->channel = channel; + + set->cb_args.cb_fn = spdk_bs_sequence_completion; + set->cb_args.cb_arg = set; + set->cb_args.channel = channel->dev_channel; + + return (spdk_bs_sequence_t *)set; +} + +void +spdk_bs_sequence_read(spdk_bs_sequence_t *seq, void *payload, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Reading %u blocks from LBA %lu\n", lba_count, lba); + + set->u.sequence.cb_fn = cb_fn; + set->u.sequence.cb_arg = cb_arg; + + channel->dev->read(channel->dev, channel->dev_channel, payload, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_sequence_write(spdk_bs_sequence_t *seq, void *payload, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Writing %u blocks to LBA %lu\n", lba_count, lba); + + set->u.sequence.cb_fn = cb_fn; + set->u.sequence.cb_arg = cb_arg; + + channel->dev->write(channel->dev, channel->dev_channel, payload, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_sequence_flush(spdk_bs_sequence_t *seq, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Flushing\n"); + + set->u.sequence.cb_fn = cb_fn; + set->u.sequence.cb_arg = cb_arg; + + channel->dev->flush(channel->dev, channel->dev_channel, + &set->cb_args); +} + +void +spdk_bs_sequence_unmap(spdk_bs_sequence_t *seq, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Unmapping %u blocks at LBA %lu\n", lba_count, lba); + + set->u.sequence.cb_fn = cb_fn; + set->u.sequence.cb_arg = cb_arg; + + channel->dev->unmap(channel->dev, channel->dev_channel, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno) +{ + if (bserrno != 0) { + seq->bserrno = bserrno; + } + spdk_bs_request_set_complete((struct spdk_bs_request_set *)seq); +} + +static void +spdk_bs_batch_completion(struct spdk_io_channel *_channel, + void *cb_arg, int bserrno) +{ + struct spdk_bs_request_set *set = cb_arg; + + set->u.batch.outstanding_ops--; + if (bserrno != 0) { + set->bserrno = bserrno; + } + + if (set->u.batch.outstanding_ops == 0 && set->u.batch.batch_closed) { + if (set->u.batch.cb_fn) { + set->cb_args.cb_fn = spdk_bs_sequence_completion; + set->u.batch.cb_fn((spdk_bs_sequence_t *)set, set->u.batch.cb_arg, bserrno); + } else { + spdk_bs_request_set_complete(set); + } + } +} + +spdk_bs_batch_t * +spdk_bs_batch_open(struct spdk_io_channel *_channel, + struct spdk_bs_cpl *cpl) +{ + struct spdk_bs_channel *channel; + struct spdk_bs_request_set *set; + + channel = spdk_io_channel_get_ctx(_channel); + + set = TAILQ_FIRST(&channel->reqs); + if (!set) { + return NULL; + } + TAILQ_REMOVE(&channel->reqs, set, link); + + set->cpl = *cpl; + set->bserrno = 0; + set->channel = channel; + + set->u.batch.cb_fn = NULL; + set->u.batch.cb_arg = NULL; + set->u.batch.outstanding_ops = 0; + set->u.batch.batch_closed = 0; + + set->cb_args.cb_fn = spdk_bs_batch_completion; + set->cb_args.cb_arg = set; + set->cb_args.channel = channel->dev_channel; + + return (spdk_bs_batch_t *)set; +} + +void +spdk_bs_batch_read(spdk_bs_batch_t *batch, void *payload, + uint64_t lba, uint32_t lba_count) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Reading %u blocks from LBA %lu\n", lba_count, lba); + + set->u.batch.outstanding_ops++; + channel->dev->read(channel->dev, channel->dev_channel, payload, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_batch_write(spdk_bs_batch_t *batch, void *payload, + uint64_t lba, uint32_t lba_count) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Writing %u blocks to LBA %lu\n", lba_count, lba); + + set->u.batch.outstanding_ops++; + channel->dev->write(channel->dev, channel->dev_channel, payload, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_batch_flush(spdk_bs_batch_t *batch) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Flushing\n"); + + set->u.batch.outstanding_ops++; + channel->dev->flush(channel->dev, channel->dev_channel, + &set->cb_args); +} + +void +spdk_bs_batch_unmap(spdk_bs_batch_t *batch, + uint64_t lba, uint32_t lba_count) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; + struct spdk_bs_channel *channel = set->channel; + + SPDK_TRACELOG(SPDK_TRACE_BLOB_RW, "Unmapping %u blocks at LBA %lu\n", lba_count, lba); + + set->u.batch.outstanding_ops++; + channel->dev->unmap(channel->dev, channel->dev_channel, lba, lba_count, + &set->cb_args); +} + +void +spdk_bs_batch_close(spdk_bs_batch_t *batch) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch; + + set->u.batch.batch_closed = 1; + + if (set->u.batch.outstanding_ops == 0) { + if (set->u.batch.cb_fn) { + set->cb_args.cb_fn = spdk_bs_sequence_completion; + set->u.batch.cb_fn((spdk_bs_sequence_t *)set, set->u.batch.cb_arg, set->bserrno); + } else { + spdk_bs_request_set_complete(set); + } + } +} + +spdk_bs_batch_t * +spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq, spdk_bs_sequence_cpl cb_fn, void *cb_arg) +{ + struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)seq; + + set->u.batch.cb_fn = cb_fn; + set->u.batch.cb_arg = cb_arg; + set->u.batch.outstanding_ops = 0; + set->u.batch.batch_closed = 0; + + set->cb_args.cb_fn = spdk_bs_batch_completion; + + return set; +} + +SPDK_LOG_REGISTER_TRACE_FLAG("blob_rw", SPDK_TRACE_BLOB_RW); diff --git a/lib/blob/request.h b/lib/blob/request.h new file mode 100644 index 000000000..2bf9820d9 --- /dev/null +++ b/lib/blob/request.h @@ -0,0 +1,171 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BS_REQUEST_H +#define SPDK_BS_REQUEST_H + +#include + +#include "spdk/blob.h" + +enum spdk_bs_cpl_type { + SPDK_BS_CPL_TYPE_BS_BASIC, + SPDK_BS_CPL_TYPE_BS_HANDLE, + SPDK_BS_CPL_TYPE_BLOB_BASIC, + SPDK_BS_CPL_TYPE_BLOBID, + SPDK_BS_CPL_TYPE_BLOB_HANDLE, + SPDK_BS_CPL_TYPE_NESTED_SEQUENCE, +}; + +struct spdk_bs_request_set; + +/* Use a sequence to submit a set of requests serially */ +typedef struct spdk_bs_request_set spdk_bs_sequence_t; + +/* Use a batch to submit a set of requests in parallel */ +typedef struct spdk_bs_request_set spdk_bs_batch_t; + +typedef void (*spdk_bs_nested_seq_complete)(void *cb_arg, spdk_bs_sequence_t *parent, int bserrno); + +struct spdk_bs_cpl { + enum spdk_bs_cpl_type type; + union { + struct { + spdk_bs_op_complete cb_fn; + void *cb_arg; + } bs_basic; + + struct { + spdk_bs_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_blob_store *bs; + } bs_handle; + + struct { + spdk_blob_op_complete cb_fn; + void *cb_arg; + } blob_basic; + + struct { + spdk_blob_op_with_id_complete cb_fn; + void *cb_arg; + spdk_blob_id blobid; + } blobid; + + struct { + spdk_blob_op_with_handle_complete cb_fn; + void *cb_arg; + struct spdk_blob *blob; + } blob_handle; + + struct { + spdk_bs_nested_seq_complete cb_fn; + void *cb_arg; + spdk_bs_sequence_t *parent; + } nested_seq; + } u; +}; + +typedef void (*spdk_bs_sequence_cpl)(spdk_bs_sequence_t *sequence, + void *cb_arg, int bserrno); + +/* A generic request set. Can be a sequence or a batch. */ +struct spdk_bs_request_set { + struct spdk_bs_cpl cpl; + + int bserrno; + + struct spdk_bs_channel *channel; + + struct spdk_bs_dev_cb_args cb_args; + + union { + struct { + spdk_bs_sequence_cpl cb_fn; + void *cb_arg; + } sequence; + + struct { + uint32_t outstanding_ops; + uint32_t batch_closed; + spdk_bs_sequence_cpl cb_fn; + void *cb_arg; + } batch; + } u; + + TAILQ_ENTRY(spdk_bs_request_set) link; +}; + +void spdk_bs_call_cpl(struct spdk_bs_cpl *cpl, int bserrno); + +spdk_bs_sequence_t *spdk_bs_sequence_start(struct spdk_io_channel *channel, + struct spdk_bs_cpl *cpl); + +void spdk_bs_sequence_read(spdk_bs_sequence_t *seq, void *payload, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg); + +void spdk_bs_sequence_write(spdk_bs_sequence_t *seq, void *payload, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg); + +void spdk_bs_sequence_flush(spdk_bs_sequence_t *seq, + spdk_bs_sequence_cpl cb_fn, void *cb_arg); + +void spdk_bs_sequence_unmap(spdk_bs_sequence_t *seq, + uint64_t lba, uint32_t lba_count, + spdk_bs_sequence_cpl cb_fn, void *cb_arg); + +void spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno); + +spdk_bs_batch_t *spdk_bs_batch_open(struct spdk_io_channel *channel, + struct spdk_bs_cpl *cpl); + +void spdk_bs_batch_read(spdk_bs_batch_t *batch, void *payload, + uint64_t lba, uint32_t lba_count); + +void spdk_bs_batch_write(spdk_bs_batch_t *batch, void *payload, + uint64_t lba, uint32_t lba_count); + +void spdk_bs_batch_flush(spdk_bs_batch_t *batch); + +void spdk_bs_batch_unmap(spdk_bs_batch_t *batch, + uint64_t lba, uint32_t lba_count); + +void spdk_bs_batch_close(spdk_bs_batch_t *batch); + +spdk_bs_batch_t *spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq, + spdk_bs_sequence_cpl cb_fn, + void *cb_arg); + +#endif diff --git a/test/lib/Makefile b/test/lib/Makefile index 738e9f20c..43ddd25d9 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = bdev env event log json jsonrpc nvme nvmf scsi ioat util +DIRS-y = bdev blob env event log json jsonrpc nvme nvmf scsi ioat util ifeq ($(OS),Linux) DIRS-y += iscsi endif diff --git a/test/lib/blob/Makefile b/test/lib/blob/Makefile new file mode 100644 index 000000000..04d80e531 --- /dev/null +++ b/test/lib/blob/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = blob_ut +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/test/lib/blob/blob_ut/.gitignore b/test/lib/blob/blob_ut/.gitignore new file mode 100644 index 000000000..553f54655 --- /dev/null +++ b/test/lib/blob/blob_ut/.gitignore @@ -0,0 +1 @@ +blob_ut diff --git a/test/lib/blob/blob_ut/Makefile b/test/lib/blob/blob_ut/Makefile new file mode 100644 index 000000000..26529ad88 --- /dev/null +++ b/test/lib/blob/blob_ut/Makefile @@ -0,0 +1,55 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +APP = blob_ut + +C_SRCS := blob_ut.c +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/blob -I$(SPDK_ROOT_DIR)/test + +SPDK_LIB_LIST = util log + +LIBS += $(SPDK_LIB_LINKER_ARGS) -lcunit + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/blob/blob_ut/blob_ut.c b/test/lib/blob/blob_ut/blob_ut.c new file mode 100644 index 000000000..6c47bae3f --- /dev/null +++ b/test/lib/blob/blob_ut/blob_ut.c @@ -0,0 +1,973 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "spdk_cunit.h" +#include "spdk/blob.h" + +#include "lib/test_env.c" +#include "../bs_dev_common.c" +#include "blobstore.c" +#include "request.c" + +struct spdk_blob_store *g_bs; +spdk_blob_id g_blobid; +struct spdk_blob *g_blob; +int g_bserrno; +struct spdk_xattr_names *g_names; +int g_done; + +static void +bs_op_complete(void *cb_arg, int bserrno) +{ + g_bserrno = bserrno; +} + +static void +bs_op_with_handle_complete(void *cb_arg, struct spdk_blob_store *bs, + int bserrno) +{ + g_bs = bs; + g_bserrno = bserrno; +} + +static void +blob_op_complete(void *cb_arg, int bserrno) +{ + g_bserrno = bserrno; +} + +static void +blob_op_with_id_complete(void *cb_arg, spdk_blob_id blobid, int bserrno) +{ + g_blobid = blobid; + g_bserrno = bserrno; +} + +static void +blob_op_with_handle_complete(void *cb_arg, struct spdk_blob *blb, int bserrno) +{ + g_blob = blb; + g_bserrno = bserrno; +} + +static void +blob_init(void) +{ + struct spdk_bs_dev dev; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_super(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + spdk_blob_id blobid; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Get the super blob without having set one */ + spdk_bs_get_super(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == -ENOENT); + CU_ASSERT(g_blobid == SPDK_BLOBID_INVALID); + + /* Create a blob */ + spdk_bs_md_create_blob(bs, + blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + /* Set the blob as the super blob */ + spdk_bs_set_super(bs, blobid, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* Get the super blob */ + spdk_bs_get_super(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(blobid == g_blobid); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_open(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + spdk_blob_id blobid, blobid2; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + blobid2 = spdk_blob_get_id(blob); + CU_ASSERT(blobid == blobid2); + + /* Try to open file again. It should return success. */ + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(blob == g_blob); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(blob == NULL); + + /* + * Close the file a second time, releasing the second reference. This + * should succeed. + */ + blob = g_blob; + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* + * Try to open file again. It should succeed. This tests the case + * where the file is opened, closed, then re-opened again. + */ + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_delete(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + spdk_blob_id blobid; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Create a blob and then delete it. */ + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid > 0); + blobid = g_blobid; + + spdk_bs_md_delete_blob(bs, blobid, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* Try to open the blob */ + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == -ENOENT); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_resize(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + spdk_blob_id blobid; + uint64_t free_clusters; + int rc; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + free_clusters = spdk_bs_free_cluster_count(bs); + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs)); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + /* The blob started at 0 clusters. Resize it to be 5. */ + rc = spdk_bs_md_resize_blob(blob, 5); + CU_ASSERT(rc == 0); + CU_ASSERT((free_clusters - 5) == spdk_bs_free_cluster_count(bs)); + + /* Shrink the blob to 3 clusters. This will not actually release + * the old clusters until the blob is synced. + */ + rc = spdk_bs_md_resize_blob(blob, 3); + CU_ASSERT(rc == 0); + /* Verify there are still 5 clusters in use */ + CU_ASSERT((free_clusters - 5) == spdk_bs_free_cluster_count(bs)); + + spdk_bs_md_sync_blob(blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + /* Now there are only 3 clusters in use */ + CU_ASSERT((free_clusters - 3) == spdk_bs_free_cluster_count(bs)); + + /* Resize the blob to be 10 clusters. Growth takes effect immediately. */ + rc = spdk_bs_md_resize_blob(blob, 10); + CU_ASSERT(rc == 0); + CU_ASSERT((free_clusters - 10) == spdk_bs_free_cluster_count(bs)); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_md_delete_blob(bs, blobid, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +channel_ops(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_io_channel *channel; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + channel = spdk_bs_alloc_io_channel(bs, SPDK_IO_PRIORITY_DEFAULT, 32); + CU_ASSERT(channel != NULL); + + spdk_bs_free_io_channel(channel); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_write(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + spdk_blob_id blobid; + uint64_t pages_per_cluster; + uint8_t payload[10 * 4096]; + int rc; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + pages_per_cluster = spdk_bs_get_cluster_size(bs) / spdk_bs_get_page_size(bs); + + channel = spdk_bs_alloc_io_channel(bs, SPDK_IO_PRIORITY_DEFAULT, 32); + CU_ASSERT(channel != NULL); + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + /* Write to a blob with 0 size */ + spdk_bs_io_write_blob(blob, channel, payload, 0, 1, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + /* Resize the blob */ + rc = spdk_bs_md_resize_blob(blob, 5); + CU_ASSERT(rc == 0); + + /* Write to the blob */ + spdk_bs_io_write_blob(blob, channel, payload, 0, 1, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* Write starting beyond the end */ + spdk_bs_io_write_blob(blob, channel, payload, 5 * pages_per_cluster, 1, blob_op_complete, + NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + /* Write starting at a valid location but going off the end */ + spdk_bs_io_write_blob(blob, channel, payload, 4 * pages_per_cluster, pages_per_cluster + 1, + blob_op_complete, NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_free_io_channel(channel); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_read(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + spdk_blob_id blobid; + uint64_t pages_per_cluster; + uint8_t payload[10 * 4096]; + int rc; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + pages_per_cluster = spdk_bs_get_cluster_size(bs) / spdk_bs_get_page_size(bs); + + channel = spdk_bs_alloc_io_channel(bs, SPDK_IO_PRIORITY_DEFAULT, 32); + CU_ASSERT(channel != NULL); + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + /* Read from a blob with 0 size */ + spdk_bs_io_read_blob(blob, channel, payload, 0, 1, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + /* Resize the blob */ + rc = spdk_bs_md_resize_blob(blob, 5); + CU_ASSERT(rc == 0); + + /* Read from the blob */ + spdk_bs_io_read_blob(blob, channel, payload, 0, 1, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* Read starting beyond the end */ + spdk_bs_io_read_blob(blob, channel, payload, 5 * pages_per_cluster, 1, blob_op_complete, + NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + /* Read starting at a valid location but going off the end */ + spdk_bs_io_read_blob(blob, channel, payload, 4 * pages_per_cluster, pages_per_cluster + 1, + blob_op_complete, NULL); + CU_ASSERT(g_bserrno == -EINVAL); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_free_io_channel(channel); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_rw_verify(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + spdk_blob_id blobid; + uint8_t payload_read[10 * 4096]; + uint8_t payload_write[10 * 4096]; + int rc; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + channel = spdk_bs_alloc_io_channel(bs, SPDK_IO_PRIORITY_DEFAULT, 32); + CU_ASSERT(channel != NULL); + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + rc = spdk_bs_md_resize_blob(blob, 32); + CU_ASSERT(rc == 0); + + memset(payload_write, 0xE5, sizeof(payload_write)); + spdk_bs_io_write_blob(blob, channel, payload_write, 4, 10, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + memset(payload_read, 0x00, sizeof(payload_read)); + spdk_bs_io_read_blob(blob, channel, payload_read, 4, 10, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(memcmp(payload_write, payload_read, 4 * 4096) == 0); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + spdk_bs_free_io_channel(channel); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_iter(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + spdk_blob_id blobid; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + spdk_bs_md_iter_first(bs, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_blob == NULL); + CU_ASSERT(g_bserrno == -ENOENT); + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_iter_first(bs, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_blob != NULL); + CU_ASSERT(g_bserrno == 0); + blob = g_blob; + CU_ASSERT(spdk_blob_get_id(blob) == blobid); + + spdk_bs_md_iter_next(bs, &blob, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_blob == NULL); + CU_ASSERT(g_bserrno == -ENOENT); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +blob_xattr(void) +{ + struct spdk_blob_store *bs; + struct spdk_bs_dev dev; + struct spdk_blob *blob; + spdk_blob_id blobid; + uint64_t length; + int rc; + const void *value; + size_t value_len; + struct spdk_xattr_names *names; + + init_dev(&dev); + + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + rc = spdk_blob_md_set_xattr(blob, "name", "log.txt", strlen("log.txt") + 1); + CU_ASSERT(rc == 0); + + length = 2345; + rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length)); + CU_ASSERT(rc == 0); + + /* Overwrite "length" xattr. */ + length = 3456; + rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length)); + CU_ASSERT(rc == 0); + + value = NULL; + rc = spdk_bs_md_get_xattr_value(blob, "length", &value, &value_len); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(value != NULL); + CU_ASSERT(*(uint64_t *)value == length); + CU_ASSERT(value_len == 8); + + rc = spdk_bs_md_get_xattr_value(blob, "foobar", &value, &value_len); + CU_ASSERT(rc == -ENOENT); + + names = NULL; + rc = spdk_bs_md_get_xattr_names(blob, &names); + CU_ASSERT(rc == 0); + CU_ASSERT(names != NULL); + CU_ASSERT(spdk_xattr_names_get_count(names) == 2); + CU_ASSERT(!strcmp(spdk_xattr_names_get_name(names, 0), "name") || + !strcmp(spdk_xattr_names_get_name(names, 1), "name")); + CU_ASSERT(!strcmp(spdk_xattr_names_get_name(names, 0), "length") || + !strcmp(spdk_xattr_names_get_name(names, 1), "length")); + spdk_xattr_names_free(names); + + rc = spdk_blob_md_remove_xattr(blob, "name"); + CU_ASSERT(rc == 0); + + rc = spdk_blob_md_remove_xattr(blob, "foobar"); + CU_ASSERT(rc == -ENOENT); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +static void +bs_load(void) +{ + struct spdk_bs_dev dev; + spdk_blob_id blobid; + struct spdk_blob *blob; + uint64_t length; + int rc; + const void *value; + size_t value_len; + + init_dev(&dev); + + /* Initialize a new blob store */ + spdk_bs_init(&dev, NULL, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + /* Create a blob */ + spdk_bs_md_create_blob(g_bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid = g_blobid; + + spdk_bs_md_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + /* Set some xattrs */ + rc = spdk_blob_md_set_xattr(blob, "name", "log.txt", strlen("log.txt") + 1); + CU_ASSERT(rc == 0); + + length = 2345; + rc = spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length)); + CU_ASSERT(rc == 0); + + /* Resize the blob */ + rc = spdk_bs_md_resize_blob(blob, 10); + CU_ASSERT(rc == 0); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + blob = NULL; + g_blob = NULL; + g_blobid = SPDK_BLOBID_INVALID; + + /* Unload the blob store */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + g_blob = NULL; + g_blobid = 0; + + /* Load an existing blob store */ + spdk_bs_load(&dev, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + spdk_bs_md_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob = g_blob; + + /* Get the xattrs */ + value = NULL; + rc = spdk_bs_md_get_xattr_value(blob, "length", &value, &value_len); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(value != NULL); + CU_ASSERT(*(uint64_t *)value == length); + CU_ASSERT(value_len == 8); + + rc = spdk_bs_md_get_xattr_value(blob, "foobar", &value, &value_len); + CU_ASSERT(rc == -ENOENT); + + CU_ASSERT(spdk_blob_get_num_clusters(blob) == 10); + + spdk_bs_md_close_blob(&blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + blob = NULL; + g_blob = NULL; + g_blobid = SPDK_BLOBID_INVALID; + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +/* + * Create a blobstore with a cluster size different than the default, and ensure it is + * persisted. + */ +static void +bs_cluster_sz(void) +{ + struct spdk_bs_dev dev; + struct spdk_bs_opts opts; + uint32_t cluster_sz; + + init_dev(&dev); + spdk_bs_opts_init(&opts); + opts.cluster_sz *= 2; + cluster_sz = opts.cluster_sz; + + /* Initialize a new blob store */ + spdk_bs_init(&dev, &opts, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + CU_ASSERT(spdk_bs_get_cluster_size(g_bs) == cluster_sz); + + /* Unload the blob store */ + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + g_blob = NULL; + g_blobid = 0; + + /* Load an existing blob store */ + spdk_bs_load(&dev, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + CU_ASSERT(spdk_bs_get_cluster_size(g_bs) == cluster_sz); + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +/* + * Test resizing of the metadata blob. This requires creating enough blobs + * so that one cluster is not enough to fit the metadata for those blobs. + * To induce this condition to happen more quickly, we reduce the cluster + * size to 16KB, which means only 4 4KB blob metadata pages can fit. + */ +static void +bs_resize_md(void) +{ + const int CLUSTER_PAGE_COUNT = 4; + const int NUM_BLOBS = CLUSTER_PAGE_COUNT * 4; + struct spdk_bs_dev dev; + struct spdk_bs_opts opts; + uint32_t cluster_sz; + spdk_blob_id blobids[NUM_BLOBS]; + int i; + + + init_dev(&dev); + spdk_bs_opts_init(&opts); + opts.cluster_sz = CLUSTER_PAGE_COUNT * 4096; + cluster_sz = opts.cluster_sz; + + /* Initialize a new blob store */ + spdk_bs_init(&dev, &opts, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + CU_ASSERT(spdk_bs_get_cluster_size(g_bs) == cluster_sz); + + for (i = 0; i < NUM_BLOBS; i++) { + g_bserrno = -1; + g_blobid = SPDK_BLOBID_INVALID; + spdk_bs_md_create_blob(g_bs, + blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobids[i] = g_blobid; + } + + /* Unload the blob store */ + g_bserrno = -1; + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + + /* Load an existing blob store */ + g_bserrno = -1; + g_bs = NULL; + spdk_bs_load(&dev, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + + CU_ASSERT(spdk_bs_get_cluster_size(g_bs) == cluster_sz); + + for (i = 0; i < NUM_BLOBS; i++) { + g_bserrno = -1; + g_blob = NULL; + spdk_bs_md_open_blob(g_bs, blobids[i], blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + g_bserrno = -1; + spdk_bs_md_close_blob(&g_blob, blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + } + + spdk_bs_unload(g_bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +/* Try to hit all of the corner cases associated with serializing + * a blob to disk + */ +static void +blob_serialize(void) +{ + struct spdk_bs_dev dev; + struct spdk_bs_opts opts; + struct spdk_blob_store *bs; + spdk_blob_id blobid[2]; + struct spdk_blob *blob[2]; + uint64_t i; + char *value; + int rc; + + init_dev(&dev); + + /* Initialize a new blobstore with very small clusters */ + spdk_bs_opts_init(&opts); + opts.cluster_sz = dev.blocklen * 8; + spdk_bs_init(&dev, &opts, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + /* Create and open two blobs */ + for (i = 0; i < 2; i++) { + spdk_bs_md_create_blob(bs, blob_op_with_id_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + blobid[i] = g_blobid; + + /* Open a blob */ + spdk_bs_md_open_blob(bs, blobid[i], blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob[i] = g_blob; + + /* Set a fairly large xattr on both blobs to eat up + * metadata space + */ + value = calloc(dev.blocklen - 64, sizeof(char)); + SPDK_CU_ASSERT_FATAL(value != NULL); + memset(value, i, dev.blocklen / 2); + rc = spdk_blob_md_set_xattr(blob[i], "name", value, dev.blocklen - 64); + CU_ASSERT(rc == 0); + free(value); + } + + /* Resize the blobs, alternating 1 cluster at a time. + * This thwarts run length encoding and will cause spill + * over of the extents. + */ + for (i = 0; i < 6; i++) { + rc = spdk_bs_md_resize_blob(blob[i % 2], (i / 2) + 1); + CU_ASSERT(rc == 0); + } + + for (i = 0; i < 2; i++) { + spdk_bs_md_sync_blob(blob[i], blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + } + + /* Close the blobs */ + for (i = 0; i < 2; i++) { + spdk_bs_md_close_blob(&blob[i], blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + } + + /* Unload the blobstore */ + spdk_bs_unload(bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; + g_blob = NULL; + g_blobid = 0; + bs = NULL; + + /* Load an existing blob store */ + spdk_bs_load(&dev, bs_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); + bs = g_bs; + + for (i = 0; i < 2; i++) { + blob[i] = NULL; + + spdk_bs_md_open_blob(bs, blobid[i], blob_op_with_handle_complete, NULL); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + blob[i] = g_blob; + + CU_ASSERT(spdk_blob_get_num_clusters(blob[i]) == 3); + + spdk_bs_md_close_blob(&blob[i], blob_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + } + + spdk_bs_unload(bs, bs_op_complete, NULL); + CU_ASSERT(g_bserrno == 0); + g_bs = NULL; +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("blob", NULL, NULL); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if ( + CU_add_test(suite, "blob_init", blob_init) == NULL || + CU_add_test(suite, "blob_open", blob_open) == NULL || + CU_add_test(suite, "blob_delete", blob_delete) == NULL || + CU_add_test(suite, "blob_resize", blob_resize) == NULL || + CU_add_test(suite, "channel_ops", channel_ops) == NULL || + CU_add_test(suite, "blob_super", blob_super) == NULL || + CU_add_test(suite, "blob_write", blob_write) == NULL || + CU_add_test(suite, "blob_read", blob_read) == NULL || + CU_add_test(suite, "blob_rw_verify", blob_rw_verify) == NULL || + CU_add_test(suite, "blob_iter", blob_iter) == NULL || + CU_add_test(suite, "blob_xattr", blob_xattr) == NULL || + CU_add_test(suite, "bs_load", bs_load) == NULL || + CU_add_test(suite, "bs_cluster_sz", bs_cluster_sz) == NULL || + CU_add_test(suite, "bs_resize_md", bs_resize_md) == NULL || + CU_add_test(suite, "blob_serialize", blob_serialize) == NULL + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + g_dev_buffer = calloc(1, DEV_BUFFER_SIZE); + spdk_allocate_thread(); + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + spdk_free_thread(); + free(g_dev_buffer); + return num_failures; +} diff --git a/test/lib/blob/bs_dev_common.c b/test/lib/blob/bs_dev_common.c new file mode 100644 index 000000000..4c44b3207 --- /dev/null +++ b/test/lib/blob/bs_dev_common.c @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEV_BUFFER_SIZE (64 * 1024 * 1024) +#define DEV_BUFFER_BLOCKLEN (4096) +#define DEV_BUFFER_BLOCKCNT (DEV_BUFFER_SIZE / DEV_BUFFER_BLOCKLEN) +uint8_t *g_dev_buffer; + +static struct spdk_io_channel * +dev_create_channel(struct spdk_bs_dev *dev) +{ + return NULL; +} + +static void +dev_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel) +{ +} + +static void +dev_destroy(struct spdk_bs_dev *dev) +{ +} + +static void +dev_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args) +{ + uint64_t offset, length; + + offset = lba * DEV_BUFFER_BLOCKLEN; + length = lba_count * DEV_BUFFER_BLOCKLEN; + SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); + memcpy(payload, &g_dev_buffer[offset], length); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0); +} + +static void +dev_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args) +{ + uint64_t offset, length; + + offset = lba * DEV_BUFFER_BLOCKLEN; + length = lba_count * DEV_BUFFER_BLOCKLEN; + SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); + memcpy(&g_dev_buffer[offset], payload, length); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0); +} + +static void +dev_flush(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct spdk_bs_dev_cb_args *cb_args) +{ + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0); +} + +static void +dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args) +{ + uint64_t offset, length; + + offset = lba * DEV_BUFFER_BLOCKLEN; + length = lba_count * DEV_BUFFER_BLOCKLEN; + SPDK_CU_ASSERT_FATAL(offset + length <= DEV_BUFFER_SIZE); + memset(&g_dev_buffer[offset], 0, length); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0); +} + +static void +init_dev(struct spdk_bs_dev *dev) +{ + dev->create_channel = dev_create_channel; + dev->destroy_channel = dev_destroy_channel; + dev->destroy = dev_destroy; + dev->read = dev_read; + dev->write = dev_write; + dev->flush = dev_flush; + dev->unmap = dev_unmap; + dev->blockcnt = DEV_BUFFER_BLOCKCNT; + dev->blocklen = DEV_BUFFER_BLOCKLEN; +} diff --git a/unittest.sh b/unittest.sh index 0fcd1683b..6c01d08ed 100755 --- a/unittest.sh +++ b/unittest.sh @@ -5,6 +5,8 @@ set -xe +$valgrind test/lib/blob/blob_ut/blob_ut + $valgrind test/lib/nvme/unit/nvme_c/nvme_ut $valgrind test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut $valgrind test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut