diff --git a/Dockerfile.dapper b/Dockerfile.dapper new file mode 100644 index 000000000..6dcd3c012 --- /dev/null +++ b/Dockerfile.dapper @@ -0,0 +1,17 @@ +FROM ubuntu:20.04 + +ARG http_proxy +ARG https_proxy + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get dist-upgrade -y && \ + apt-get install -y make linux-libc-dev pkg-config \ + devscripts libaio-dev libc6-dev gcc meson \ + python3-pyelftools uuid-dev libssl-dev \ + libibverbs-dev libfuse-dev libiscsi-dev \ + zlib1g-dev libfdt-dev libpcap0.8-dev \ + libncurses-dev libcunit1-dev \ + build-essential nasm autoconf libtool automake + diff --git a/app/spdk_tgt/Makefile b/app/spdk_tgt/Makefile index 627cfe269..31f872866 100644 --- a/app/spdk_tgt/Makefile +++ b/app/spdk_tgt/Makefile @@ -41,6 +41,8 @@ C_SRCS := spdk_tgt.c SPDK_LIB_LIST = $(ALL_MODULES_LIST) +SPDK_LIB_LIST += bdev_longhorn + SPDK_LIB_LIST += event event_iscsi event_nvmf ifeq ($(SPDK_ROOT_DIR)/lib/env_dpdk,$(CONFIG_ENV)) diff --git a/examples/bdev/hello_world/Makefile b/examples/bdev/hello_world/Makefile index ae413fdf3..ed6b0953a 100644 --- a/examples/bdev/hello_world/Makefile +++ b/examples/bdev/hello_world/Makefile @@ -37,6 +37,7 @@ APP = hello_bdev C_SRCS := hello_bdev.c -SPDK_LIB_LIST = $(ALL_MODULES_LIST) event event_bdev +SPDK_LIB_LIST = $(filter-out bdev_longhorn,$(ALL_MODULES_LIST)) event event_bdev + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/examples/blob/cli/Makefile b/examples/blob/cli/Makefile index e072ca6d7..c10c0d57e 100644 --- a/examples/blob/cli/Makefile +++ b/examples/blob/cli/Makefile @@ -38,6 +38,7 @@ APP = blobcli C_SRCS := blobcli.c # Don't link bdev_lvol in blobcli - otherwise this utility cannot operate on an lvolstore -SPDK_LIB_LIST = $(filter-out bdev_lvol,$(ALL_MODULES_LIST)) event event_bdev +SPDK_LIB_LIST1 = $(filter-out bdev_lvol,$(ALL_MODULES_LIST)) event event_bdev +SPDK_LIB_LIST = $(filter-out bdev_longhorn,$(SPDK_LIB_LIST1)) event event_bdev include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/examples/blob/hello_world/Makefile b/examples/blob/hello_world/Makefile index a93d47755..1e16de48a 100644 --- a/examples/blob/hello_world/Makefile +++ b/examples/blob/hello_world/Makefile @@ -37,6 +37,6 @@ APP = hello_blob C_SRCS := hello_blob.c -SPDK_LIB_LIST = $(ALL_MODULES_LIST) event event_bdev +SPDK_LIB_LIST = $(filter-out bdev_longhorn,$(ALL_MODULES_LIST)) event event_bdev include $(SPDK_ROOT_DIR)/mk/spdk.app.mk diff --git a/include/spdk/rpc.h b/include/spdk/rpc.h index b85606e43..a4b055e2a 100644 --- a/include/spdk/rpc.h +++ b/include/spdk/rpc.h @@ -148,6 +148,19 @@ void spdk_rpc_set_state(uint32_t state_mask); */ uint32_t spdk_rpc_get_state(void); +/** + * Handle an RPC message. This allows the creations of a different + * \c spdk_jsonrpc_server that uses the same internal registry of JSON + * methods. + * + * \param request RPC request to handle. + * \param method Name for the registered method. + * \param request + */ +void spdk_rpc_handler(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *method, + const struct spdk_json_val *params); + #ifdef __cplusplus } #endif diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index 172fc8ac9..e3b4d7020 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -7535,6 +7535,13 @@ spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value, return blob_set_xattr(blob, name, value, value_len, false); } +int +spdk_blob_set_internal_xattr(struct spdk_blob *blob, const char *name, const void *value, + uint16_t value_len) +{ + return blob_set_xattr(blob, name, value, value_len, true); +} + static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal) { diff --git a/lib/rpc/rpc.c b/lib/rpc/rpc.c index 9662b887d..98a6a304e 100644 --- a/lib/rpc/rpc.c +++ b/lib/rpc/rpc.c @@ -101,10 +101,10 @@ _get_rpc_method_raw(const char *method) return _get_rpc_method(&method_val); } -static void -jsonrpc_handler(struct spdk_jsonrpc_request *request, - const struct spdk_json_val *method, - const struct spdk_json_val *params) +void +spdk_rpc_handler(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *method, + const struct spdk_json_val *params) { struct spdk_rpc_method *m; @@ -198,7 +198,7 @@ spdk_rpc_listen(const char *listen_addr) g_jsonrpc_server = spdk_jsonrpc_server_listen(AF_UNIX, 0, (struct sockaddr *)&g_rpc_listen_addr_unix, sizeof(g_rpc_listen_addr_unix), - jsonrpc_handler); + spdk_rpc_handler); if (g_jsonrpc_server == NULL) { SPDK_ERRLOG("spdk_jsonrpc_server_listen() failed\n"); close(g_rpc_lock_fd); diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index b944ea62b..3ef34f2c3 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -34,7 +34,7 @@ BLOCKDEV_MODULES_LIST = bdev_malloc bdev_null bdev_nvme bdev_passthru bdev_lvol BLOCKDEV_MODULES_LIST += bdev_raid bdev_error bdev_gpt bdev_split bdev_delay -BLOCKDEV_MODULES_LIST += bdev_zone_block +BLOCKDEV_MODULES_LIST += bdev_zone_block BLOCKDEV_MODULES_LIST += blobfs blobfs_bdev blob_bdev blob lvol vmd nvme # Some bdev modules don't have pollers, so they can directly run in interrupt mode diff --git a/module/bdev/Makefile b/module/bdev/Makefile index bbf33fdfe..84646eb6c 100644 --- a/module/bdev/Makefile +++ b/module/bdev/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y += delay error gpt lvol malloc null nvme passthru raid split zone_block +DIRS-y += delay error gpt lvol malloc null nvme passthru raid split zone_block longhorn DIRS-$(CONFIG_CRYPTO) += crypto diff --git a/module/bdev/longhorn/Makefile b/module/bdev/longhorn/Makefile new file mode 100644 index 000000000..c9e369b68 --- /dev/null +++ b/module/bdev/longhorn/Makefile @@ -0,0 +1,45 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 3 +SO_MINOR := 0 + +C_SRCS = bdev_longhorn.c bdev_longhorn_rpc.c bdev_longhorn_rebuild.c bdev_longhorn_rebuild_rpc.c bdev_longhorn_remote.c bdev_longhorn_remote_sync.c bdev_longhorn_sync_client.c bdev_longhorn_lvol.c bdev_longhorn_impl.c bdev_longhorn_nvmf.c bdev_longhorn_sync.c bdev_longhorn_replica_rpc.c bdev_longhorn_replica.c bdev_longhorn_snapshot.c bdev_longhorn_snapshot_rpc.c +LIBNAME = bdev_longhorn + +SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/module/bdev/longhorn/bdev_longhorn.c b/module/bdev/longhorn/bdev_longhorn.c new file mode 100644 index 000000000..430d51493 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn.c @@ -0,0 +1,1908 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_longhorn.h" +#include "bdev_longhorn_impl.h" +#include "bdev_longhorn_nvmf.h" +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/util.h" +#include "spdk/json.h" +#include "spdk/string.h" + +static bool g_shutdown_started = false; + +/* longhorn bdev config as read from config file */ +struct longhorn_config g_longhorn_config = { + .longhorn_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_longhorn_config.longhorn_bdev_config_head), +}; + +/* + * List of longhorn bdev in configured list, these longhorn bdevs are registered with + * bdev layer + */ +struct longhorn_configured_tailq g_longhorn_bdev_configured_list = TAILQ_HEAD_INITIALIZER( + g_longhorn_bdev_configured_list); + +/* List of longhorn bdev in configuring list */ +struct longhorn_configuring_tailq g_longhorn_bdev_configuring_list = TAILQ_HEAD_INITIALIZER( + g_longhorn_bdev_configuring_list); + +/* List of all longhorn bdevs */ +struct longhorn_all_tailq g_longhorn_bdev_list = TAILQ_HEAD_INITIALIZER(g_longhorn_bdev_list); + +/* List of all longhorn bdevs that are offline */ +struct longhorn_offline_tailq g_longhorn_bdev_offline_list = TAILQ_HEAD_INITIALIZER( + g_longhorn_bdev_offline_list); + +/* Function declarations */ +static void longhorn_bdev_examine(struct spdk_bdev *bdev); +static int longhorn_bdev_init(void); +static void longhorn_bdev_deconfigure(struct longhorn_bdev *longhorn_bdev, + longhorn_bdev_destruct_cb cb_fn, void *cb_arg); +static void longhorn_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx); + +/* + * brief: + * longhorn_bdev_create_cb function is a cb function for longhorn bdev which creates the + * hierarchy from longhorn bdev to base bdev io channels. It will be called per core + * params: + * io_device - pointer to longhorn bdev io device represented by longhorn_bdev + * ctx_buf - pointer to context buffer for longhorn bdev io channel + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_create_cb(void *io_device, void *ctx_buf) +{ + struct longhorn_bdev *longhorn_bdev = io_device; + struct longhorn_bdev_io_channel *longhorn_ch = ctx_buf; + struct longhorn_base_bdev_info *base_info; + struct spdk_thread *thread; + struct longhorn_base_io_channel *base_channel; + uint8_t i = 0; + + TAILQ_INIT(&longhorn_ch->base_channels); + thread = spdk_get_thread(); + + longhorn_ch->thread = thread; + SPDK_DEBUGLOG(bdev_longhorn, "onghorn_bdev_create_cb, %p\n", longhorn_ch); + SPDK_ERRLOG("longhorn_bdev_create_cb, %p, %p (%s)\n", longhorn_ch, thread, spdk_thread_get_name(thread)); + + assert(longhorn_bdev != NULL); + assert(longhorn_bdev->state == RAID_BDEV_STATE_ONLINE); + + longhorn_ch->num_channels = longhorn_bdev->num_base_bdevs; + longhorn_ch->longhorn_bdev = longhorn_bdev; + + +#if 0 + // TODO linked list + longhorn_ch->base_channel = calloc(longhorn_ch->num_channels, + sizeof(struct spdk_io_channel *)); + if (!longhorn_ch->base_channel) { + SPDK_ERRLOG("Unable to allocate base bdevs io channel\n"); + return -ENOMEM; + } +#endif + + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + base_channel = calloc(1, sizeof(*base_channel)); + /* + * Get the spdk_io_channel for all the base bdevs. This is used during + * split logic to send the respective child bdev ios to respective base + * bdev io channel. + */ + base_channel->base_channel = spdk_bdev_get_io_channel(base_info->desc); + //longhorn_ch->base_channel[i] = base_info->base_channel; + + //base_channel->base_channel = base_info->base_channel; + SPDK_ERRLOG("base_info when creating io_channel %p\n", base_info); + base_channel->base_info = base_info; + + if (!base_channel->base_channel) { + SPDK_ERRLOG("Unable to create io channel for base bdev\n"); + } +#if 0 + if (!base_channel->base_channel[i]) { + uint8_t j; + + for (j = 0; j < i; j++) { + spdk_put_io_channel(longhorn_ch->base_channel[j]); + } + free(base_channel->base_channel); + longhorn_ch->base_channel = NULL; + SPDK_ERRLOG("Unable to create io channel for base bdev\n"); + return -ENOMEM; + } +#endif + + TAILQ_INSERT_TAIL(&longhorn_ch->base_channels, + base_channel, channels); + + + ++i; + + } + + TAILQ_INSERT_TAIL(&longhorn_bdev->io_channel_head, longhorn_ch, channels); + longhorn_bdev->num_io_channels++; + SPDK_ERRLOG("adding num io channels %u\n", longhorn_bdev->num_io_channels); + + return 0; +} + +static void longhorn_check_pause_complete(struct longhorn_bdev *longhorn_bdev) +{ + struct longhorn_bdev_io_channel *io_channel; + struct longhorn_pause_cb_entry *entry; + struct longhorn_pause_cb_entry *next; + + TAILQ_FOREACH(io_channel, &longhorn_bdev->io_channel_head, channels) { + if (!io_channel->pause_complete) { + return; + } + } + + SPDK_ERRLOG("PAUSE COMPLETE \n"); + + // Call pause callback(s). + entry = TAILQ_FIRST(&longhorn_bdev->pause_cbs); + + while (entry != NULL) { + + if (entry->cb_fn != NULL) { + entry->cb_fn(longhorn_bdev, entry->cb_arg); + } else { + SPDK_ERRLOG("PAUSE CB NULL \n"); + } + next = TAILQ_NEXT(entry, link); + + free(entry); + + entry = next; + } + + TAILQ_INIT(&longhorn_bdev->pause_cbs); +} + +void bdev_longhorn_pause_io(void *cb_arg) { + struct longhorn_bdev_io_channel *longhorn_ch = cb_arg; + + longhorn_ch->paused = true; + + SPDK_ERRLOG("PAUSE CB : %d \n", longhorn_ch->io_ops); + + + if (longhorn_ch->io_ops == 0) { + longhorn_ch->pause_complete = true; + + longhorn_check_pause_complete(longhorn_ch->longhorn_bdev); + } + +} + +void bdev_longhorn_unpause_io(void *cb_arg) { + struct longhorn_bdev_io_channel *longhorn_ch = cb_arg; + + longhorn_ch->paused = false; + longhorn_ch->pause_complete = false; +} + +void longhorn_volume_add_pause_cb(struct longhorn_bdev *longhorn_bdev, + longhorn_pause_cb cb_fn, + void *cb_arg) +{ + struct longhorn_pause_cb_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->cb_fn = cb_fn; + entry->cb_arg = cb_arg; + SPDK_ERRLOG("adding PAUSE CB \n"); + + TAILQ_INSERT_TAIL(&longhorn_bdev->pause_cbs, entry, link); +} + +/* + * brief: + * longhorn_bdev_destroy_cb function is a cb function for longhorn bdev which deletes the + * hierarchy from longhorn bdev to base bdev io channels. It will be called per core + * params: + * io_device - pointer to longhorn bdev io device represented by longhorn_bdev + * ctx_buf - pointer to context buffer for longhorn bdev io channel + * returns: + * none + */ +static void +longhorn_bdev_destroy_cb(void *io_device, void *ctx_buf) +{ + struct longhorn_bdev *longhorn_bdev = io_device; + struct longhorn_bdev_io_channel *longhorn_ch = ctx_buf; + struct longhorn_base_io_channel *base_channel; + struct longhorn_base_io_channel *next; + uint8_t i; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_destroy_cb\n"); + SPDK_ERRLOG("longhorn_bdev_destroy_cb, %p\n", longhorn_ch); + + assert(longhorn_ch != NULL); + + + base_channel = TAILQ_FIRST(&longhorn_ch->base_channels); + + while (base_channel != NULL) { + next = TAILQ_NEXT(base_channel, channels); + + + SPDK_ERRLOG("longhorn_bdev_destroy_cb, removing bdev %s\n", base_channel->base_info->bdev->name); + spdk_put_io_channel(base_channel->base_channel); + + free(base_channel); + + base_channel = next; + } + + + longhorn_ch->deleted = true; + + longhorn_bdev->num_io_channels--; + SPDK_ERRLOG("removing num io channels %u\n", longhorn_bdev->num_io_channels); + TAILQ_REMOVE(&longhorn_bdev->io_channel_head, longhorn_ch, channels); + +#if 0 + + for (i = 0; i < longhorn_ch->num_channels; i++) { + /* Free base bdev channels */ + assert(longhorn_ch->base_channel[i] != NULL); + spdk_put_io_channel(longhorn_ch->base_channel[i]); + } + free(longhorn_ch->base_channel); +#endif + //onghorn_ch->base_channel = NULL; +} + +/* + * brief: + * longhorn_bdev_cleanup is used to cleanup and free longhorn_bdev related data + * structures. + * params: + * longhorn_bdev - pointer to longhorn_bdev + * returns: + * none + */ +static void +longhorn_bdev_cleanup(struct longhorn_bdev *longhorn_bdev) +{ + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_cleanup, %p name %s, state %u, config %p\n", + longhorn_bdev, + longhorn_bdev->bdev.name, longhorn_bdev->state, longhorn_bdev->config); + if (longhorn_bdev->state == RAID_BDEV_STATE_CONFIGURING) { + TAILQ_REMOVE(&g_longhorn_bdev_configuring_list, longhorn_bdev, state_link); + } else if (longhorn_bdev->state == RAID_BDEV_STATE_OFFLINE) { + TAILQ_REMOVE(&g_longhorn_bdev_offline_list, longhorn_bdev, state_link); + } else { + assert(0); + } + TAILQ_REMOVE(&g_longhorn_bdev_list, longhorn_bdev, global_link); + free(longhorn_bdev->bdev.name); + free(longhorn_bdev->base_bdev_info); + if (longhorn_bdev->config) { + longhorn_bdev->config->longhorn_bdev = NULL; + } + free(longhorn_bdev); +} + +/* + * brief: + * wrapper for the bdev close operation + * params: + * base_info - longhorn base bdev info + * returns: + */ +static void +_longhorn_bdev_free_base_bdev_resource(void *ctx) +{ + struct spdk_bdev_desc *desc = ctx; + + spdk_bdev_close(desc); +} + + +/* + * brief: + * free resource of base bdev for longhorn bdev + * params: + * longhorn_bdev - pointer to longhorn bdev + * base_info - longhorn base bdev info + * returns: + * 0 - success + * non zero - failure + */ +static void +longhorn_bdev_free_base_bdev_resource(struct longhorn_bdev *longhorn_bdev, + struct longhorn_base_bdev_info *base_info) +{ + spdk_bdev_module_release_bdev(base_info->bdev); + if (base_info->thread && base_info->thread != spdk_get_thread()) { + spdk_thread_send_msg(base_info->thread, _longhorn_bdev_free_base_bdev_resource, base_info->desc); + } else { + spdk_bdev_close(base_info->desc); + } + base_info->desc = NULL; + base_info->bdev = NULL; + + assert(longhorn_bdev->num_base_bdevs_discovered); + longhorn_bdev->num_base_bdevs_discovered--; +} + +/* + * brief: + * longhorn_bdev_destruct is the destruct function table pointer for longhorn bdev + * params: + * ctxt - pointer to longhorn_bdev + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_destruct(void *ctxt) +{ + struct longhorn_bdev *longhorn_bdev = ctxt; + struct longhorn_base_bdev_info *base_info; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_destruct\n"); + + longhorn_bdev->destruct_called = true; + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + + /* + * Close all base bdev descriptors for which call has come from below + * layers. Also close the descriptors if we have started shutdown. + */ + if (g_shutdown_started || + ((base_info->remove_scheduled == true) && + (base_info->bdev != NULL))) { + longhorn_bdev_free_base_bdev_resource(longhorn_bdev, base_info); + } + } + + if (g_shutdown_started) { + TAILQ_REMOVE(&g_longhorn_bdev_configured_list, longhorn_bdev, state_link); + longhorn_bdev->state = RAID_BDEV_STATE_OFFLINE; + TAILQ_INSERT_TAIL(&g_longhorn_bdev_offline_list, longhorn_bdev, state_link); + } + + spdk_io_device_unregister(longhorn_bdev, NULL); + + if (longhorn_bdev->num_base_bdevs_discovered == 0) { + /* Free longhorn_bdev when there are no base bdevs left */ + SPDK_DEBUGLOG(bdev_longhorn, "longhorn bdev base bdevs is 0, going to free all in destruct\n"); + longhorn_bdev_cleanup(longhorn_bdev); + } + + return 0; +} + +void +longhorn_bdev_io_complete(struct longhorn_bdev_io *longhorn_io, enum spdk_bdev_io_status status) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io); + + if (longhorn_io->submitted) { + if (atomic_load(&longhorn_io->longhorn_ch->io_ops) == 0) { + if (longhorn_io->longhorn_ch->paused) { + longhorn_io->longhorn_ch->pause_complete = true; + + longhorn_check_pause_complete(longhorn_io->longhorn_bdev); + + } + + } + } + + + spdk_bdev_io_complete(bdev_io, status); +} + +/* + * brief: + * longhorn_bdev_io_complete_part - signal the completion of a part of the expected + * base bdev IOs and complete the longhorn_io if this is the final expected IO. + * The caller should first set longhorn_io->base_bdev_io_remaining. This function + * will decrement this counter by the value of the 'completed' parameter and + * complete the longhorn_io if the counter reaches 0. The caller is free to + * interpret the 'base_bdev_io_remaining' and 'completed' values as needed, + * it can represent e.g. blocks or IOs. + * params: + * longhorn_io - pointer to longhorn_bdev_io + * completed - the part of the longhorn_io that has been completed + * status - status of the base IO + * returns: + * true - if the longhorn_io is completed + * false - otherwise + */ +bool +longhorn_bdev_io_complete_part(struct longhorn_bdev_io *longhorn_io, uint64_t completed, + enum spdk_bdev_io_status status) +{ + assert(longhorn_io->base_bdev_io_remaining >= completed); + longhorn_io->base_bdev_io_remaining -= completed; + + atomic_fetch_sub(&longhorn_io->longhorn_bdev->io_ops, 1); + atomic_fetch_sub(&longhorn_io->longhorn_ch->io_ops, 1); + + + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + longhorn_io->base_bdev_io_status = status; + } + + if (longhorn_io->base_bdev_io_remaining == 0) { + longhorn_bdev_io_complete(longhorn_io, longhorn_io->base_bdev_io_status); + return true; + } else { + return false; + } +} + +/* + * brief: + * longhorn_bdev_queue_io_wait function processes the IO which failed to submit. + * It will try to queue the IOs after storing the context to bdev wait queue logic. + * params: + * longhorn_io - pointer to longhorn_bdev_io + * bdev - the block device that the IO is submitted to + * ch - io channel + * cb_fn - callback when the spdk_bdev_io for bdev becomes available + * returns: + * none + */ +void +longhorn_bdev_queue_io_wait(struct longhorn_bdev_io *longhorn_io, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn) +{ + longhorn_io->waitq_entry.bdev = bdev; + longhorn_io->waitq_entry.cb_fn = cb_fn; + longhorn_io->waitq_entry.cb_arg = longhorn_io; + spdk_bdev_queue_io_wait(bdev, ch, &longhorn_io->waitq_entry); +} + +static void +longhorn_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct longhorn_bdev_io *longhorn_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + longhorn_bdev_io_complete_part(longhorn_io, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); +} + +static void +longhorn_bdev_submit_reset_request(struct longhorn_bdev_io *longhorn_io); + +static void +_longhorn_bdev_submit_reset_request(void *_longhorn_io) +{ + struct longhorn_bdev_io *longhorn_io = _longhorn_io; + + longhorn_bdev_submit_reset_request(longhorn_io); +} + +/* + * brief: + * longhorn_bdev_submit_reset_request function submits reset requests + * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in + * which case it will queue it for later submission + * params: + * longhorn_io + * returns: + * none + */ +static void +longhorn_bdev_submit_reset_request(struct longhorn_bdev_io *longhorn_io) +{ + struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch; + struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev; + int ret; + struct longhorn_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + struct longhorn_base_io_channel *base_channel; + + + if (longhorn_io->base_bdev_io_remaining == 0) { + longhorn_io->base_bdev_io_remaining = longhorn_bdev->num_base_bdevs; + } + + TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) { + //while (longhorn_io->base_bdev_io_submitted < longhorn_bdev->num_base_bdevs) { + base_ch = base_channel->base_channel; + base_info = base_channel->base_info; + + ret = spdk_bdev_reset(base_info->desc, base_ch, + longhorn_base_bdev_reset_complete, longhorn_io); + if (ret == 0) { + longhorn_io->base_bdev_io_submitted++; + } else if (ret == -ENOMEM) { + longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch, + _longhorn_bdev_submit_reset_request); + return; + } else { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + } +} + +/* + * brief: + * Callback function to spdk_bdev_io_get_buf. + * params: + * ch - pointer to longhorn bdev io channel + * bdev_io - pointer to parent bdev_io on longhorn bdev device + * success - True if buffer is allocated or false otherwise. + * returns: + * none + */ +static void +longhorn_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + bool success) +{ + struct longhorn_bdev_io *longhorn_io = (struct longhorn_bdev_io *)bdev_io->driver_ctx; + + if (!success) { + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + longhorn_submit_rw_request(longhorn_io); +} + +/* + * brief: + * longhorn_bdev_submit_request function is the submit_request function pointer of + * longhorn bdev function table. This is used to submit the io on longhorn_bdev to below + * layers. + * params: + * ch - pointer to longhorn bdev io channel + * bdev_io - pointer to parent bdev_io on longhorn bdev device + * returns: + * none + */ +static void +longhorn_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct longhorn_bdev_io *longhorn_io = (struct longhorn_bdev_io *)bdev_io->driver_ctx; + + longhorn_io->longhorn_bdev = bdev_io->bdev->ctxt; + longhorn_io->longhorn_ch = spdk_io_channel_get_ctx(ch); + longhorn_io->base_bdev_io_remaining = 0; + longhorn_io->base_bdev_io_submitted = 0; + longhorn_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + spdk_bdev_io_get_buf(bdev_io, longhorn_bdev_get_buf_cb, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); + break; + case SPDK_BDEV_IO_TYPE_WRITE: + longhorn_submit_rw_request(longhorn_io); + break; + + case SPDK_BDEV_IO_TYPE_RESET: + longhorn_bdev_submit_reset_request(longhorn_io); + break; + + case SPDK_BDEV_IO_TYPE_FLUSH: + case SPDK_BDEV_IO_TYPE_UNMAP: + longhorn_submit_null_payload_request(longhorn_io); + break; + + default: + SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + break; + } +} + +/* + * brief: + * _longhorn_bdev_io_type_supported checks whether io_type is supported in + * all base bdev modules of longhorn bdev module. If anyone among the base_bdevs + * doesn't support, the longhorn device doesn't supports + * params: + * longhorn_bdev - pointer to longhorn bdev context + * io_type - io type + * returns: + * true - io_type is supported + * false - io_type is not supported + */ +inline static bool +_longhorn_bdev_io_type_supported(struct longhorn_bdev *longhorn_bdev, enum spdk_bdev_io_type io_type) +{ + struct longhorn_base_bdev_info *base_info; + +#if 0 + if (io_type == SPDK_BDEV_IO_TYPE_FLUSH || + io_type == SPDK_BDEV_IO_TYPE_UNMAP) { + return false; + } + +#endif + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev == NULL) { + assert(false); + continue; + } + + if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) { + return false; + } + } + + return true; +} + +/* + * brief: + * longhorn_bdev_io_type_supported is the io_supported function for bdev function + * table which returns whether the particular io type is supported or not by + * longhorn bdev module + * params: + * ctx - pointer to longhorn bdev context + * type - io type + * returns: + * true - io_type is supported + * false - io_type is not supported + */ +static bool +longhorn_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + switch (io_type) { + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + return true; + + case SPDK_BDEV_IO_TYPE_FLUSH: + case SPDK_BDEV_IO_TYPE_RESET: + case SPDK_BDEV_IO_TYPE_UNMAP: + return _longhorn_bdev_io_type_supported(ctx, io_type); + + default: + return false; + } + + return false; +} + +/* + * brief: + * longhorn_bdev_get_io_channel is the get_io_channel function table pointer for + * longhorn bdev. This is used to return the io channel for this longhorn bdev + * params: + * ctxt - pointer to longhorn_bdev + * returns: + * pointer to io channel for longhorn bdev + */ +static struct spdk_io_channel * +longhorn_bdev_get_io_channel(void *ctxt) +{ + struct longhorn_bdev *longhorn_bdev = ctxt; + + return spdk_get_io_channel(longhorn_bdev); +} + +/* + * brief: + * longhorn_bdev_dump_info_json is the function table pointer for longhorn bdev + * params: + * ctx - pointer to longhorn_bdev + * w - pointer to json context + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct longhorn_bdev *longhorn_bdev = ctx; + struct longhorn_base_bdev_info *base_info; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_dump_config_json\n"); + assert(longhorn_bdev != NULL); + + /* Dump the longhorn bdev configuration related information */ + spdk_json_write_named_object_begin(w, "longhorn"); + spdk_json_write_named_uint32(w, "state", longhorn_bdev->state); + spdk_json_write_named_uint32(w, "destruct_called", longhorn_bdev->destruct_called); + spdk_json_write_named_uint32(w, "num_base_bdevs", longhorn_bdev->num_base_bdevs); + spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", longhorn_bdev->num_base_bdevs_discovered); + spdk_json_write_name(w, "base_bdevs_list"); + spdk_json_write_array_begin(w); + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev) { + spdk_json_write_string(w, base_info->bdev->name); + } else { + spdk_json_write_null(w); + } + } + spdk_json_write_array_end(w); + spdk_json_write_object_end(w); + + return 0; +} + +/* + * brief: + * longhorn_bdev_write_config_json is the function table pointer for longhorn bdev + * params: + * bdev - pointer to spdk_bdev + * w - pointer to json context + * returns: + * none + */ +static void +longhorn_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +{ + struct longhorn_bdev *longhorn_bdev = bdev->ctxt; + struct longhorn_base_bdev_info *base_info; + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "method", "bdev_longhorn_create"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "name", bdev->name); + + spdk_json_write_named_array_begin(w, "base_bdevs"); + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev) { + spdk_json_write_string(w, base_info->bdev->name); + } + } + spdk_json_write_array_end(w); + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); +} + +/* g_longhorn_bdev_fn_table is the function table for longhorn bdev */ +static const struct spdk_bdev_fn_table g_longhorn_bdev_fn_table = { + .destruct = longhorn_bdev_destruct, + .submit_request = longhorn_bdev_submit_request, + .io_type_supported = longhorn_bdev_io_type_supported, + .get_io_channel = longhorn_bdev_get_io_channel, + .dump_info_json = longhorn_bdev_dump_info_json, + .write_config_json = longhorn_bdev_write_config_json, +}; + +/* + * brief: + * longhorn_bdev_config_cleanup function is used to free memory for one longhorn_bdev in configuration + * params: + * longhorn_cfg - pointer to longhorn_bdev_config structure + * returns: + * none + */ +void +longhorn_bdev_config_cleanup(struct longhorn_bdev_config *longhorn_cfg) +{ + uint8_t i; + + TAILQ_REMOVE(&g_longhorn_config.longhorn_bdev_config_head, longhorn_cfg, link); + g_longhorn_config.total_longhorn_bdev--; + + if (longhorn_cfg->base_bdev) { + for (i = 0; i < longhorn_cfg->num_base_bdevs; i++) { + free(longhorn_cfg->base_bdev[i].name); + } + free(longhorn_cfg->base_bdev); + } + free(longhorn_cfg->name); + free(longhorn_cfg); +} + +/* + * brief: + * longhorn_bdev_free is the longhorn bdev function table function pointer. This is + * called on bdev free path + * params: + * none + * returns: + * none + */ +static void +longhorn_bdev_free(void) +{ + struct longhorn_bdev_config *longhorn_cfg, *tmp; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_free\n"); + TAILQ_FOREACH_SAFE(longhorn_cfg, &g_longhorn_config.longhorn_bdev_config_head, link, tmp) { + longhorn_bdev_config_cleanup(longhorn_cfg); + } +} + +/* brief + * longhorn_bdev_config_find_by_name is a helper function to find longhorn bdev config + * by name as key. + * + * params: + * longhorn_name - name for longhorn bdev. + */ +struct longhorn_bdev_config * +longhorn_bdev_config_find_by_name(const char *longhorn_name) +{ + struct longhorn_bdev_config *longhorn_cfg; + + TAILQ_FOREACH(longhorn_cfg, &g_longhorn_config.longhorn_bdev_config_head, link) { + if (!strcmp(longhorn_cfg->name, longhorn_name)) { + return longhorn_cfg; + } + } + + return longhorn_cfg; +} + +/* brief + * longhorn_bdev_find_by_name is a helper function to find longhorn bdev + * by name as key. + * + * params: + * longhorn_name - name for longhorn bdev. + */ +struct longhorn_bdev * +longhorn_bdev_find_by_name(const char *longhorn_name) +{ + struct longhorn_bdev *longhorn_bdev; + + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_list, global_link) { + if (!strcmp(longhorn_bdev->bdev.name, longhorn_name)) { + return longhorn_bdev; + } + } + + return NULL; +} + + +/* + * brief + * longhorn_bdev_config_add function adds config for newly created longhorn bdev. + * + * params: + * longhorn_name - name for longhorn bdev. + * strip_size - strip size in KB + * num_base_bdevs - number of base bdevs. + * _longhorn_cfg - Pointer to newly added configuration + */ +int +longhorn_bdev_config_add(const char *longhorn_name, uint8_t num_base_bdevs, + struct longhorn_bdev_config **_longhorn_cfg) +{ + struct longhorn_bdev_config *longhorn_cfg; + + longhorn_cfg = longhorn_bdev_config_find_by_name(longhorn_name); + if (longhorn_cfg != NULL) { + SPDK_ERRLOG("Duplicate longhorn bdev name found in config file %s\n", + longhorn_name); + return -EEXIST; + } + + + if (num_base_bdevs == 0) { + SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs); + return -EINVAL; + } + + longhorn_cfg = calloc(1, sizeof(*longhorn_cfg)); + if (longhorn_cfg == NULL) { + SPDK_ERRLOG("unable to allocate memory\n"); + return -ENOMEM; + } + + longhorn_cfg->name = strdup(longhorn_name); + if (!longhorn_cfg->name) { + free(longhorn_cfg); + SPDK_ERRLOG("unable to allocate memory\n"); + return -ENOMEM; + } + longhorn_cfg->num_base_bdevs = num_base_bdevs; + + longhorn_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*longhorn_cfg->base_bdev)); + if (longhorn_cfg->base_bdev == NULL) { + free(longhorn_cfg->name); + free(longhorn_cfg); + SPDK_ERRLOG("unable to allocate memory\n"); + return -ENOMEM; + } + + TAILQ_INSERT_TAIL(&g_longhorn_config.longhorn_bdev_config_head, longhorn_cfg, link); + g_longhorn_config.total_longhorn_bdev++; + + *_longhorn_cfg = longhorn_cfg; + return 0; +} + +/* + * brief: + * longhorn_bdev_config_add_base_bdev function add base bdev to longhorn bdev config. + * + * params: + * longhorn_cfg - pointer to longhorn bdev configuration + * base_bdev_name - name of base bdev + * slot - Position to add base bdev + */ +int +longhorn_bdev_config_add_base_bdev(struct longhorn_bdev_config *longhorn_cfg, const char *base_bdev_name, + uint8_t slot) +{ + uint8_t i; + struct longhorn_bdev_config *tmp; + char *bdev_name; + + if (slot >= longhorn_cfg->num_base_bdevs) { + return -EINVAL; + } + + bdev_name = spdk_sprintf_alloc("%s/%s", base_bdev_name, longhorn_cfg->name); + +#if 0 + TAILQ_FOREACH(tmp, &g_longhorn_config.longhorn_bdev_config_head, link) { + for (i = 0; i < tmp->num_base_bdevs; i++) { + if (tmp->base_bdev[i].name != NULL) { + if (!strcmp(tmp->base_bdev[i].name, bdev_name)) { + SPDK_ERRLOG("duplicate base bdev name %s mentioned\n", + base_bdev_name); + return -EEXIST; + } + } + } + } +#endif + + longhorn_cfg->base_bdev[slot].name = bdev_name; + if (longhorn_cfg->base_bdev[slot].name == NULL) { + SPDK_ERRLOG("unable to allocate memory\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * brief: + * longhorn_bdev_fini_start is called when bdev layer is starting the + * shutdown process + * params: + * none + * returns: + * none + */ +static void +longhorn_bdev_fini_start(void) +{ + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_fini_start\n"); + g_shutdown_started = true; +} + +/* + * brief: + * longhorn_bdev_exit is called on longhorn bdev module exit time by bdev layer + * params: + * none + * returns: + * none + */ +static void +longhorn_bdev_exit(void) +{ + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_exit\n"); + longhorn_bdev_free(); +} + +/* + * brief: + * longhorn_bdev_get_ctx_size is used to return the context size of bdev_io for longhorn + * module + * params: + * none + * returns: + * size of spdk_bdev_io context for longhorn + */ +static int +longhorn_bdev_get_ctx_size(void) +{ + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_get_ctx_size\n"); + return sizeof(struct longhorn_bdev_io); +} + +/* + * brief: + * longhorn_bdev_can_claim_bdev is the function to check if this base_bdev can be + * claimed by longhorn bdev or not. + * params: + * bdev_name - represents base bdev name + * _longhorn_cfg - pointer to longhorn bdev config parsed from config file + * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct + * slot. This field is only valid if return value of this function is true + * returns: + * true - if bdev can be claimed + * false - if bdev can't be claimed + */ +static bool +longhorn_bdev_can_claim_bdev(const char *bdev_name, struct longhorn_bdev_config **_longhorn_cfg, + uint8_t *base_bdev_slot) +{ + struct longhorn_bdev_config *longhorn_cfg; + uint8_t i; + + TAILQ_FOREACH(longhorn_cfg, &g_longhorn_config.longhorn_bdev_config_head, link) { + for (i = 0; i < longhorn_cfg->num_base_bdevs; i++) { + /* + * Check if the base bdev name is part of longhorn bdev configuration. + * If match is found then return true and the slot information where + * this base bdev should be inserted in longhorn bdev + */ + if (!strcmp(bdev_name, longhorn_cfg->base_bdev[i].name)) { + *_longhorn_cfg = longhorn_cfg; + *base_bdev_slot = i; + return true; + } + } + } + + return false; +} + + +static struct spdk_bdev_module g_longhorn_if = { + .name = "longhorn", + .module_init = longhorn_bdev_init, + .fini_start = longhorn_bdev_fini_start, + .module_fini = longhorn_bdev_exit, + .get_ctx_size = longhorn_bdev_get_ctx_size, + .examine_config = longhorn_bdev_examine, + .async_init = false, + .async_fini = false, +}; +SPDK_BDEV_MODULE_REGISTER(longhorn, &g_longhorn_if) + +/* + * brief: + * longhorn_bdev_init is the initialization function for longhorn bdev module + * params: + * none + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_init(void) +{ + return 0; +} + +/* + * brief: + * longhorn_bdev_create allocates longhorn bdev based on passed configuration + * params: + * longhorn_cfg - configuration of longhorn bdev + * returns: + * 0 - success + * non zero - failure + */ +int +//longhorn_bdev_create(struct longhorn_bdev_config *longhorn_cfg) +longhorn_bdev_create(const char *name, uint8_t num_base_bdevs) +{ + struct longhorn_bdev *longhorn_bdev; + struct spdk_bdev *longhorn_bdev_gen; + + + longhorn_bdev = calloc(1, sizeof(*longhorn_bdev)); + if (!longhorn_bdev) { + SPDK_ERRLOG("Unable to allocate memory for longhorn bdev\n"); + return -ENOMEM; + } + + longhorn_bdev->io_ops = 0; + longhorn_bdev->num_base_bdevs = num_base_bdevs; + //longhorn_bdev->base_bdev_info = calloc(longhorn_bdev->num_base_bdevs, + // sizeof(struct longhorn_base_bdev_info)); + //if (!longhorn_bdev->base_bdev_info) { +// SPDK_ERRLOG("Unable able to allocate base bdev info\n"); +// free(longhorn_bdev); +// return -ENOMEM; +// } +// + + pthread_mutex_init(&longhorn_bdev->base_bdevs_mutex, NULL); + + + TAILQ_INIT(&longhorn_bdev->pause_cbs); + TAILQ_INIT(&longhorn_bdev->base_bdevs_head); + TAILQ_INIT(&longhorn_bdev->io_channel_head); + + longhorn_bdev->state = RAID_BDEV_STATE_CONFIGURING; + + longhorn_bdev_gen = &longhorn_bdev->bdev; + + longhorn_bdev_gen->name = strdup(name); + if (!longhorn_bdev_gen->name) { + SPDK_ERRLOG("Unable to allocate name for longhorn\n"); + //free(longhorn_bdev->base_bdev_info); + free(longhorn_bdev); + return -ENOMEM; + } + + longhorn_bdev_gen->product_name = "Longhorn Volume"; + longhorn_bdev_gen->ctxt = longhorn_bdev; + longhorn_bdev_gen->fn_table = &g_longhorn_bdev_fn_table; + longhorn_bdev_gen->module = &g_longhorn_if; + longhorn_bdev_gen->write_cache = 0; + + TAILQ_INSERT_TAIL(&g_longhorn_bdev_configuring_list, longhorn_bdev, state_link); + TAILQ_INSERT_TAIL(&g_longhorn_bdev_list, longhorn_bdev, global_link); + + + return 0; + +} + +/* + * brief + * longhorn_bdev_alloc_base_bdev_resource allocates resource of base bdev. + * params: + * longhorn_bdev - pointer to longhorn bdev + * bdev_name - base bdev name + * base_bdev_slot - position to add base bdev + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_alloc_base_bdev_resource(struct longhorn_bdev *longhorn_bdev, const char *bdev_name) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + struct longhorn_base_bdev_info *base_info; + int rc; + + rc = spdk_bdev_open_ext(bdev_name, true, longhorn_bdev_event_base_bdev, NULL, &desc); + if (rc != 0) { + if (rc != -ENODEV) { + SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name); + } + return rc; + } + + bdev = spdk_bdev_desc_get_bdev(desc); + + rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_longhorn_if); + if (rc != 0) { + SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); + spdk_bdev_close(desc); + return rc; + } + + SPDK_DEBUGLOG(bdev_longhorn, "bdev %s is claimed\n", bdev_name); + + assert(longhorn_bdev->state != RAID_BDEV_STATE_ONLINE); + //assert(base_bdev_slot < longhorn_bdev->num_base_bdevs); + + + base_info = calloc(sizeof (struct longhorn_base_bdev_info), 1); + + + base_info->thread = spdk_get_thread(); + base_info->bdev = bdev; + base_info->desc = desc; + + longhorn_bdev->num_base_bdevs_discovered++; + assert(longhorn_bdev->num_base_bdevs_discovered <= longhorn_bdev->num_base_bdevs); + + TAILQ_INSERT_TAIL(&longhorn_bdev->base_bdevs_head, base_info, infos); + + return 0; +} + +static int +longhorn_bdev_configure_base_info(struct longhorn_bdev *longhorn_bdev, + struct longhorn_base_bdev_info *base_info) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + int rc; + + rc = spdk_bdev_open_ext(base_info->bdev_name, true, longhorn_bdev_event_base_bdev, NULL, &desc); + if (rc != 0) { + if (rc != -ENODEV) { + SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->bdev_name); + } + return rc; + } + + bdev = spdk_bdev_desc_get_bdev(desc); + + rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_longhorn_if); + if (rc != 0) { + SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n"); + spdk_bdev_close(desc); + return rc; + } + + SPDK_DEBUGLOG(bdev_longhorn, "bdev %s is claimed\n", base_info->bdev_name); + + assert(longhorn_bdev->state != RAID_BDEV_STATE_ONLINE); + //assert(base_bdev_slot < longhorn_bdev->num_base_bdevs); + + + //base_info = calloc(sizeof (struct longhorn_base_bdev_info), 1); + + + base_info->thread = spdk_get_thread(); + base_info->bdev = bdev; + base_info->desc = desc; + + longhorn_bdev->num_base_bdevs_discovered++; + assert(longhorn_bdev->num_base_bdevs_discovered <= longhorn_bdev->num_base_bdevs); + + TAILQ_INSERT_TAIL(&longhorn_bdev->base_bdevs_head, base_info, infos); + + return 0; +} + +static void longhorn_bdev_nvmf_cb(void *cb) { +} + +/* + * brief: + * If longhorn bdev config is complete, then only register the longhorn bdev to + * bdev layer and remove this longhorn bdev from configuring list and + * insert the longhorn bdev to configured list + * params: + * longhorn_bdev - pointer to longhorn bdev + * returns: + * 0 - success + * non zero - failure + */ +static int +longhorn_bdev_configure(struct longhorn_bdev *longhorn_bdev) +{ + uint32_t blocklen = 0; + struct spdk_bdev *longhorn_bdev_gen; + struct longhorn_base_bdev_info *base_info; + int rc = 0; + char *nqn; + + assert(longhorn_bdev->state == RAID_BDEV_STATE_CONFIGURING); + assert(longhorn_bdev->num_base_bdevs_discovered == longhorn_bdev->num_base_bdevs); + + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + + /* Check blocklen for all base bdevs that it should be same */ + if (blocklen == 0) { + blocklen = base_info->bdev->blocklen; + } else if (blocklen != base_info->bdev->blocklen) { + /* + * Assumption is that all the base bdevs for any longhorn bdev should + * have same blocklen + */ + SPDK_ERRLOG("Blocklen of various bdevs not matching\n"); + return -EINVAL; + } + } + assert(blocklen > 0); + + /* The strip_size_kb is read in from user in KB. Convert to blocks here for + * internal use. + */ + longhorn_bdev->blocklen_shift = spdk_u32log2(blocklen); + + longhorn_bdev_gen = &longhorn_bdev->bdev; + longhorn_bdev_gen->blocklen = blocklen; + + rc = longhorn_start(longhorn_bdev); + if (rc != 0) { + SPDK_ERRLOG("longhorn module startup callback failed\n"); + return rc; + } + longhorn_bdev->state = RAID_BDEV_STATE_ONLINE; + SPDK_DEBUGLOG(bdev_longhorn, "io device register %p\n", longhorn_bdev); + SPDK_DEBUGLOG(bdev_longhorn, "blockcnt %" PRIu64 ", blocklen %u\n", + longhorn_bdev_gen->blockcnt, longhorn_bdev_gen->blocklen); + spdk_io_device_register(longhorn_bdev, longhorn_bdev_create_cb, longhorn_bdev_destroy_cb, + sizeof(struct longhorn_bdev_io_channel), + longhorn_bdev->bdev.name); + rc = spdk_bdev_register(longhorn_bdev_gen); + if (rc != 0) { + SPDK_ERRLOG("Unable to register longhorn bdev and stay at configuring state\n"); + spdk_io_device_unregister(longhorn_bdev, NULL); + longhorn_bdev->state = RAID_BDEV_STATE_CONFIGURING; + return rc; + } + SPDK_DEBUGLOG(bdev_longhorn, "longhorn bdev generic %p\n", longhorn_bdev_gen); + TAILQ_REMOVE(&g_longhorn_bdev_configuring_list, longhorn_bdev, state_link); + TAILQ_INSERT_TAIL(&g_longhorn_bdev_configured_list, longhorn_bdev, state_link); + SPDK_DEBUGLOG(bdev_longhorn, "longhorn bdev is created with name %s, longhorn_bdev %p\n", + longhorn_bdev_gen->name, longhorn_bdev); + + nqn = spdk_sprintf_alloc(VOLUME_FORMAT, longhorn_bdev_gen->name); + longhorn_publish_nvmf(longhorn_bdev_gen->name, nqn, "127.0.0.1", + 4420, longhorn_bdev_nvmf_cb, NULL); + + + return 0; +} + +/* + * brief: + * If longhorn bdev is online and registered, change the bdev state to + * configuring and unregister this longhorn device. Queue this longhorn device + * in configuring list + * params: + * longhorn_bdev - pointer to longhorn bdev + * cb_fn - callback function + * cb_arg - argument to callback function + * returns: + * none + */ +static void +longhorn_bdev_deconfigure(struct longhorn_bdev *longhorn_bdev, longhorn_bdev_destruct_cb cb_fn, + void *cb_arg) +{ + if (longhorn_bdev->state != RAID_BDEV_STATE_ONLINE) { + if (cb_fn) { + cb_fn(cb_arg, 0); + } + return; + } + + assert(longhorn_bdev->num_base_bdevs == longhorn_bdev->num_base_bdevs_discovered); + TAILQ_REMOVE(&g_longhorn_bdev_configured_list, longhorn_bdev, state_link); + longhorn_bdev->state = RAID_BDEV_STATE_OFFLINE; + assert(longhorn_bdev->num_base_bdevs_discovered); + TAILQ_INSERT_TAIL(&g_longhorn_bdev_offline_list, longhorn_bdev, state_link); + SPDK_DEBUGLOG(bdev_longhorn, "longhorn bdev state chaning from online to offline\n"); + + spdk_bdev_unregister(&longhorn_bdev->bdev, cb_fn, cb_arg); +} + +/* + * brief: + * longhorn_bdev_find_by_base_bdev function finds the longhorn bdev which has + * claimed the base bdev. + * params: + * base_bdev - pointer to base bdev pointer + * _longhorn_bdev - Reference to pointer to longhorn bdev + * _base_info - Reference to the longhorn base bdev info. + * returns: + * true - if the longhorn bdev is found. + * false - if the longhorn bdev is not found. + */ +static bool +longhorn_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct longhorn_bdev **_longhorn_bdev, + struct longhorn_base_bdev_info **_base_info) +{ + struct longhorn_bdev *longhorn_bdev; + struct longhorn_base_bdev_info *base_info; + + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_list, global_link) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev == base_bdev) { + *_longhorn_bdev = longhorn_bdev; + *_base_info = base_info; + return true; + } + } + } + + return false; +} + +/* + * brief: + * longhorn_bdev_remove_base_bdev function is called by below layers when base_bdev + * is removed. This function checks if this base bdev is part of any longhorn bdev + * or not. If yes, it takes necessary action on that particular longhorn bdev. + * params: + * base_bdev - pointer to base bdev pointer which got removed + * returns: + * none + */ +static void +longhorn_bdev_remove_base_bdev(struct spdk_bdev *base_bdev) +{ + struct longhorn_bdev *longhorn_bdev = NULL; + struct longhorn_base_bdev_info *base_info; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_remove_base_bdev\n"); + + /* Find the longhorn_bdev which has claimed this base_bdev */ + if (!longhorn_bdev_find_by_base_bdev(base_bdev, &longhorn_bdev, &base_info)) { + SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name); + return; + } + + assert(base_info->desc); + base_info->remove_scheduled = true; + + if (longhorn_bdev->destruct_called == true || + longhorn_bdev->state == RAID_BDEV_STATE_CONFIGURING) { + /* + * As longhorn bdev is not registered yet or already unregistered, + * so cleanup should be done here itself. + */ + longhorn_bdev_free_base_bdev_resource(longhorn_bdev, base_info); + if (longhorn_bdev->num_base_bdevs_discovered == 0) { + /* There is no base bdev for this longhorn, so free the longhorn device. */ + longhorn_bdev_cleanup(longhorn_bdev); + return; + } + } + + longhorn_bdev_deconfigure(longhorn_bdev, NULL, NULL); +} + +/* + * brief: + * longhorn_bdev_event_base_bdev function is called by below layers when base_bdev + * triggers asynchronous event. + * params: + * type - event details. + * bdev - bdev that triggered event. + * event_ctx - context for event. + * returns: + * none + */ +static void +longhorn_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx) +{ + switch (type) { + case SPDK_BDEV_EVENT_REMOVE: + longhorn_bdev_remove_base_bdev(bdev); + break; + default: + SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); + break; + } +} + +/* + * brief: + * Remove base bdevs from the longhorn bdev one by one. Skip any base bdev which + * doesn't exist. + * params: + * longhorn_cfg - pointer to longhorn bdev config. + * cb_fn - callback function + * cb_ctx - argument to callback function + */ +void +longhorn_bdev_remove_base_devices(struct longhorn_bdev_config *longhorn_cfg, + longhorn_bdev_destruct_cb cb_fn, void *cb_arg) +{ + struct longhorn_bdev *longhorn_bdev; + struct longhorn_base_bdev_info *base_info; + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_remove_base_devices\n"); + + longhorn_bdev = longhorn_cfg->longhorn_bdev; + if (longhorn_bdev == NULL) { + SPDK_DEBUGLOG(bdev_longhorn, "longhorn bdev %s doesn't exist now\n", longhorn_cfg->name); + if (cb_fn) { + cb_fn(cb_arg, 0); + } + return; + } + + if (longhorn_bdev->destroy_started) { + SPDK_DEBUGLOG(bdev_longhorn, "destroying longhorn bdev %s is already started\n", + longhorn_cfg->name); + if (cb_fn) { + cb_fn(cb_arg, -EALREADY); + } + return; + } + + longhorn_bdev->destroy_started = true; + + //LONGHORN_FOR_EACH_BASE_BDEV(longhorn_bdev, base_info) { + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev == NULL) { + continue; + } + + assert(base_info->desc); + base_info->remove_scheduled = true; + + if (longhorn_bdev->destruct_called == true || + longhorn_bdev->state == RAID_BDEV_STATE_CONFIGURING) { + /* + * As longhorn bdev is not registered yet or already unregistered, + * so cleanup should be done here itself. + */ + longhorn_bdev_free_base_bdev_resource(longhorn_bdev, base_info); + if (longhorn_bdev->num_base_bdevs_discovered == 0) { + /* There is no base bdev for this longhorn, so free the longhorn device. */ + longhorn_bdev_cleanup(longhorn_bdev); + if (cb_fn) { + cb_fn(cb_arg, 0); + } + return; + } + } + } + + longhorn_bdev_deconfigure(longhorn_bdev, cb_fn, cb_arg); +} + +/* + * brief: + * longhorn_bdev_add_base_device function is the actual function which either adds + * the nvme base device to existing longhorn bdev or create a new longhorn bdev. It also claims + * the base device and keep the open descriptor. + * params: + * longhorn_cfg - pointer to longhorn bdev config + * bdev - pointer to base bdev + * base_bdev_slot - position to add base bdev + * returns: + * 0 - success + * non zero - failure + */ +int +//longhorn_bdev_add_base_device(struct longhorn_bdev_config *longhorn_cfg, const char *bdev_name, +//longhorn_bdev_add_base_device(const char *name, const char *bdev_name) +longhorn_bdev_add_base_device(struct longhorn_bdev *longhorn_bdev, + struct longhorn_base_bdev_info *base_info) +{ + int rc; + + + rc = longhorn_bdev_configure_base_info(longhorn_bdev, base_info); + if (rc != 0) { + if (rc != -ENODEV) { + SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", base_info->bdev_name); + } + return rc; + } + + assert(longhorn_bdev->num_base_bdevs_discovered <= longhorn_bdev->num_base_bdevs); + + if (longhorn_bdev->num_base_bdevs_discovered == longhorn_bdev->num_base_bdevs) { + rc = longhorn_bdev_configure(longhorn_bdev); + if (rc != 0) { + SPDK_ERRLOG("Failed to configure longhorn bdev\n"); + return rc; + } + } + + return 0; +} + +struct replica_add_ctx { + struct longhorn_base_bdev_info *base_info; +}; + +int +longhorn_bdev_add_replica(const char *name, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port) { + struct longhorn_bdev *longhorn_bdev; + struct longhorn_base_bdev_info *base_info; + + longhorn_bdev = longhorn_bdev_find_by_name(name); + if (!longhorn_bdev) { + SPDK_ERRLOG("Longhorn bdev '%s' is not created yet\n", name); + return -ENODEV; + } + + + base_info = calloc(1, sizeof(*base_info)); + base_info->lvs = strdup(lvs); + + if ((!addr || addr[0] == '\0')) { + base_info->is_local = true; + base_info->bdev_name = spdk_sprintf_alloc("%s/%s", lvs, name); + + longhorn_bdev_add_base_device(longhorn_bdev, base_info); + } else { + base_info->remote_addr = strdup(addr); + base_info->nvmf_port = nvmf_port; + base_info->comm_port = comm_port; + } +} + + + +/* + * brief: + * Add base bdevs to the longhorn bdev one by one. Skip any base bdev which doesn't + * exist or fails to add. If all base bdevs are successfully added, the longhorn bdev + * moves to the configured state and becomes available. Otherwise, the longhorn bdev + * stays at the configuring state with added base bdevs. + * params: + * longhorn_cfg - pointer to longhorn bdev config + * returns: + * 0 - The longhorn bdev moves to the configured state or stays at the configuring + * state with added base bdevs due to any nonexistent base bdev. + * non zero - Failed to add any base bdev and stays at the configuring state with + * added base bdevs. + */ +#if 0 +int +longhorn_bdev_add_base_devices(struct longhorn_bdev_config *longhorn_cfg) +{ + uint8_t i; + int rc = 0, _rc; + + for (i = 0; i < longhorn_cfg->num_base_bdevs; i++) { + _rc = longhorn_bdev_add_base_device(longhorn_cfg, longhorn_cfg->base_bdev[i].name, i); + if (_rc == -ENODEV) { + SPDK_DEBUGLOG(bdev_longhorn, "base bdev %s doesn't exist now\n", + longhorn_cfg->base_bdev[i].name); + } else if (_rc != 0) { + SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n", + longhorn_cfg->base_bdev[i].name, longhorn_cfg->name, + spdk_strerror(-_rc)); + if (rc == 0) { + rc = _rc; + } + } + } + + return rc; +} +#endif + +/* + * brief: + * longhorn_bdev_examine function is the examine function call by the below layers + * like bdev_nvme layer. This function will check if this base bdev can be + * claimed by this longhorn bdev or not. + * params: + * bdev - pointer to base bdev + * returns: + * none + */ +static void +longhorn_bdev_examine(struct spdk_bdev *bdev) +{ + struct longhorn_bdev_config *longhorn_cfg; + uint8_t base_bdev_slot; + +#if 0 + if (longhorn_bdev_can_claim_bdev(bdev->name, &longhorn_cfg, &base_bdev_slot)) { + longhorn_bdev_add_base_device(longhorn_cfg, bdev->name, base_bdev_slot); + } else { + SPDK_DEBUGLOG(bdev_longhorn, "bdev %s can't be claimed\n", + bdev->name); + } +#endif + + spdk_bdev_module_examine_done(&g_longhorn_if); +} + + + +static struct longhorn_base_bdev_info * +longhorn_bdev_find_base_bdev(struct longhorn_bdev *longhorn_bdev, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port) { + struct longhorn_base_bdev_info *base_info; + + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (!base_info->lvs) { + SPDK_ERRLOG("base bdev lvs is null\n"); + continue; + } + + if (strcmp(base_info->lvs, lvs) == 0) { + return base_info; + } + } + + return NULL; +} + +struct io_channel_remove_ctx { + struct longhorn_base_bdev_info *base_info; + struct longhorn_bdev_io_channel *io_channel; +}; + +static struct longhorn_base_io_channel * +longhorn_io_find_channel(struct longhorn_base_bdev_info *base_info, + struct longhorn_bdev_io_channel *io_channel) +{ + struct longhorn_base_io_channel *base_channel; + + TAILQ_FOREACH(base_channel, &io_channel->base_channels, channels) { + if (base_channel->base_info == base_info) { + return base_channel; + } + } + + return NULL; +} + +static void longhorn_io_channel_remove_bdev(void *arg) { + struct io_channel_remove_ctx *ctx = arg; + struct longhorn_base_io_channel *base_channel; + + base_channel = longhorn_io_find_channel(ctx->base_info, ctx->io_channel); + + if (base_channel != NULL) { + + SPDK_ERRLOG("removing %p\n", base_channel); + TAILQ_REMOVE(&ctx->io_channel->base_channels, base_channel, channels); + //1TAILQ_INSERT_TAIL(&longhorn_ch->base_channels, base_channel, channels); + free(base_channel); + + } + + ctx->io_channel->last_read_io_ch = NULL; + + TAILQ_FOREACH(base_channel, &ctx->io_channel->base_channels, channels) { + SPDK_ERRLOG("Longhorn base bdev '%s' remaining %p\n", base_channel->base_info->lvs, base_channel); + } + + /* TODO If this is the last io_channel to remove, + * * unclaim the bdev + * * free the base_info */ + + + free(ctx); +} + + + +int longhorn_bdev_remove_replica(char *name, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port) { + struct longhorn_bdev *longhorn_bdev; + struct longhorn_base_bdev_info *base_info; + struct longhorn_bdev_io_channel *io_channel; + struct io_channel_remove_ctx *ctx; + int rc; + + longhorn_bdev = longhorn_bdev_find_by_name(name); + if (!longhorn_bdev) { + SPDK_ERRLOG("Longhorn bdev '%s' is not created yet\n", name); + return -ENODEV; + } + + rc = pthread_mutex_trylock(&longhorn_bdev->base_bdevs_mutex); + + if (rc != 0) { + if (errno == EBUSY) { + SPDK_ERRLOG("Longhorn bdev '%s' is busy\n", name); + } + + + return -errno; + } + + base_info = longhorn_bdev_find_base_bdev(longhorn_bdev, lvs, addr, nvmf_port, comm_port); + + if (base_info == NULL) { + SPDK_ERRLOG("replica in longhorn bdev '%s' is not found\n", name); + pthread_mutex_unlock(&longhorn_bdev->base_bdevs_mutex); + + return -ENODEV; + + } + + + longhorn_bdev->num_base_bdevs_discovered--; + longhorn_bdev->num_base_bdevs--; + TAILQ_REMOVE(&longhorn_bdev->base_bdevs_head, base_info, infos); + + /* signal each longhorn_io to stop using the bdev */ + SPDK_ERRLOG("num io channels %u\n", longhorn_bdev->num_io_channels); + + TAILQ_FOREACH(io_channel, &longhorn_bdev->io_channel_head, channels) { + ctx = calloc(1, sizeof (*ctx)); + + ctx->base_info = base_info; + ctx->io_channel = io_channel; + + if (!io_channel->deleted) { + spdk_thread_send_msg(io_channel->thread, longhorn_io_channel_remove_bdev, ctx); + } + + + } + + pthread_mutex_unlock(&longhorn_bdev->base_bdevs_mutex); + + + + return 0; +} + + +int longhorn_volume_add_replica(char *name, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port) { + struct longhorn_bdev *longhorn_bdev; + struct longhorn_base_bdev_info *base_info; + struct longhorn_bdev_io_channel *io_channel; + struct io_channel_remove_ctx *ctx; + int rc; + + longhorn_bdev = longhorn_bdev_find_by_name(name); + if (!longhorn_bdev) { + SPDK_ERRLOG("Longhorn bdev '%s' is not created yet\n", name); + return -ENODEV; + } + + rc = pthread_mutex_trylock(&longhorn_bdev->base_bdevs_mutex); + + if (rc != 0) { + if (errno == EBUSY) { + SPDK_ERRLOG("Longhorn bdev '%s' is busy\n", name); + } + + + return -errno; + } +} + +void longhorn_unpause(struct longhorn_bdev *longhorn_bdev) +{ + int rc; + struct longhorn_bdev_io_channel *io_channel; + + + rc = pthread_mutex_trylock(&longhorn_bdev->base_bdevs_mutex); + + if (rc != 0) { + return -errno; + } + + TAILQ_FOREACH(io_channel, &longhorn_bdev->io_channel_head, channels) { + spdk_thread_send_msg(io_channel->thread, bdev_longhorn_unpause_io, io_channel); + + } + + pthread_mutex_unlock(&longhorn_bdev->base_bdevs_mutex); + SPDK_ERRLOG("UNPAUSE COMPLETE \n"); + + return 0; +} + +/* Log component for bdev longhorn bdev module */ +SPDK_LOG_REGISTER_COMPONENT(bdev_longhorn) diff --git a/module/bdev/longhorn/bdev_longhorn.h b/module/bdev/longhorn/bdev_longhorn.h new file mode 100644 index 000000000..2c056cab6 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BDEV_RAID_INTERNAL_H +#define SPDK_BDEV_RAID_INTERNAL_H + +#include +#include "spdk/bdev_module.h" + +/* + * Raid state describes the state of the longhorn. This longhorn bdev can be either in + * configured list or configuring list + */ +enum longhorn_bdev_state { + /* longhorn bdev is ready and is seen by upper layers */ + RAID_BDEV_STATE_ONLINE, + + /* + * longhorn bdev is configuring, not all underlying bdevs are present. + * And can't be seen by upper layers. + */ + RAID_BDEV_STATE_CONFIGURING, + + /* + * In offline state, longhorn bdev layer will complete all incoming commands without + * submitting to underlying base nvme bdevs + */ + RAID_BDEV_STATE_OFFLINE, + + /* longhorn bdev max, new states should be added before this */ + RAID_BDEV_MAX +}; + +enum longhorn_base_bdev_state { + LONGHORN_BASE_BDEV_RW, + LONGHORN_BASE_BDEV_WO, + LONGHORN_BASE_BDEV_ERR +}; + +/* + * longhorn_base_bdev_info contains information for the base bdevs which are part of some + * longhorn. This structure contains the per base bdev information. Whatever is + * required per base device for longhorn bdev will be kept here + */ +struct longhorn_base_bdev_info { + /* pointer to base spdk bdev */ + struct spdk_bdev *bdev; + + /* pointer to base bdev descriptor opened by longhorn bdev */ + struct spdk_bdev_desc *desc; + + + //struct spdk_io_channel *base_channel; + + /* + * When underlying base device calls the hot plug function on drive removal, + * this flag will be set and later after doing some processing, base device + * descriptor will be closed + */ + bool remove_scheduled; + + /* thread where base device is opened */ + struct spdk_thread *thread; + + enum longhorn_base_bdev_state state; + + bool is_local; + char *lvs; + char *remote_addr; + uint16_t nvmf_port; + uint16_t comm_port; + char *bdev_name; + + TAILQ_ENTRY(longhorn_base_bdev_info) infos; +}; + +/* + * longhorn_bdev_io is the context part of bdev_io. It contains the information + * related to bdev_io for a longhorn bdev + */ +struct longhorn_bdev_io { + /* The longhorn bdev associated with this IO */ + struct longhorn_bdev *longhorn_bdev; + + /* WaitQ entry, used only in waitq logic */ + struct spdk_bdev_io_wait_entry waitq_entry; + + /* Context of the original channel for this IO */ + struct longhorn_bdev_io_channel *longhorn_ch; + + /* Used for tracking progress on io requests sent to member disks. */ + uint64_t base_bdev_io_remaining; + uint8_t base_bdev_io_submitted; + uint8_t base_bdev_io_status; + + bool submitted; + +}; + +TAILQ_HEAD(base_bdevs, longhorn_base_bdev_info); + +struct longhorn_base_io_channel { + struct spdk_io_channel *base_channel; + + struct longhorn_base_bdev_info *base_info; + + TAILQ_ENTRY(longhorn_base_io_channel) channels; +}; + +/* + * longhorn_bdev_io_channel is the context of spdk_io_channel for longhorn bdev device. It + * contains the relationship of longhorn bdev io channel with base bdev io channels. + */ +struct longhorn_bdev_io_channel { + struct longhorn_bdev *longhorn_bdev; + /* Array of IO channels of base bdevs */ + //struct spdk_io_channel **base_channel; + + /* Number of IO channels */ + uint8_t num_channels; + + struct spdk_thread *thread; + bool paused; + bool pause_complete; + bool deleted; + + atomic_int io_ops; + + TAILQ_HEAD(, longhorn_base_io_channel) base_channels; + + TAILQ_ENTRY(longhorn_bdev_io_channel) channels; + + struct longhorn_base_io_channel *last_read_io_ch; +}; + +TAILQ_HEAD(io_channels, longhorn_bdev_io_channel); + +typedef void (*longhorn_pause_cb)(struct longhorn_bdev *bdev, void *arg); +struct longhorn_pause_cb_entry { + longhorn_pause_cb cb_fn; + void *cb_arg; + + TAILQ_ENTRY(longhorn_pause_cb_entry) link; +}; + +/* + * longhorn_bdev is the single entity structure which contains SPDK block device + * and the information related to any longhorn bdev either configured or + * in configuring list. io device is created on this. + */ +struct longhorn_bdev { + /* longhorn bdev device, this will get registered in bdev layer */ + struct spdk_bdev bdev; + + /* link of longhorn bdev to link it to configured, configuring or offline list */ + TAILQ_ENTRY(longhorn_bdev) state_link; + + /* link of longhorn bdev to link it to global longhorn bdev list */ + TAILQ_ENTRY(longhorn_bdev) global_link; + + TAILQ_HEAD(, longhorn_pause_cb_entry) pause_cbs; + + /* pointer to config file entry */ + struct longhorn_bdev_config *config; + + + /* array of base bdev info */ + struct longhorn_base_bdev_info *base_bdev_info; + + pthread_mutex_t base_bdevs_mutex; + struct base_bdevs base_bdevs_head; + struct io_channels io_channel_head; + + uint32_t num_io_channels; + + struct longhorn_base_bdev_info *last_read_info; + + /* block length bit shift for optimized calculation */ + uint32_t blocklen_shift; + + /* state of longhorn bdev */ + enum longhorn_bdev_state state; + + /* number of base bdevs comprising longhorn bdev */ + uint8_t num_base_bdevs; + + /* number of base bdevs discovered */ + uint8_t num_base_bdevs_discovered; + + /* Set to true if destruct is called for this longhorn bdev */ + bool destruct_called; + + /* Set to true if destroy of this longhorn bdev is started. */ + bool destroy_started; + + atomic_int io_ops; + atomic_int channels_to_pause; + +}; + +#define LONGHORN_FOR_EACH_BASE_BDEV(r, i) \ + for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++) + +/* + * longhorn_base_bdev_config is the per base bdev data structure which contains + * information w.r.t to per base bdev during parsing config + */ +struct longhorn_base_bdev_config { + /* base bdev name from config file */ + char *name; +}; + +/* + * longhorn_bdev_config contains the longhorn bdev config related information after + * parsing the config file + */ +struct longhorn_bdev_config { + /* base bdev config per underlying bdev */ + struct longhorn_base_bdev_config *base_bdev; + + /* Points to already created longhorn bdev */ + struct longhorn_bdev *longhorn_bdev; + + char *name; + + /* number of base bdevs */ + uint8_t num_base_bdevs; + + TAILQ_ENTRY(longhorn_bdev_config) link; +}; + +/* + * longhorn_config is the top level structure representing the longhorn bdev config as read + * from config file for all longhorns + */ +struct longhorn_config { + /* longhorn bdev context from config file */ + TAILQ_HEAD(, longhorn_bdev_config) longhorn_bdev_config_head; + + /* total longhorn bdev from config file */ + uint8_t total_longhorn_bdev; +}; + +/* TAIL heads for various longhorn bdev lists */ +TAILQ_HEAD(longhorn_configured_tailq, longhorn_bdev); +TAILQ_HEAD(longhorn_configuring_tailq, longhorn_bdev); +TAILQ_HEAD(longhorn_all_tailq, longhorn_bdev); +TAILQ_HEAD(longhorn_offline_tailq, longhorn_bdev); + +extern struct longhorn_configured_tailq g_longhorn_bdev_configured_list; +extern struct longhorn_configuring_tailq g_longhorn_bdev_configuring_list; +extern struct longhorn_all_tailq g_longhorn_bdev_list; +extern struct longhorn_offline_tailq g_longhorn_bdev_offline_list; +extern struct longhorn_config g_longhorn_config; + +typedef void (*longhorn_bdev_destruct_cb)(void *cb_ctx, int rc); + +//int longhorn_bdev_create(struct longhorn_bdev_config *longhorn_cfg); +int longhorn_bdev_create(const char *name, uint8_t num_base_bdevs); +int longhorn_bdev_add_base_devices(struct longhorn_bdev_config *longhorn_cfg); +void longhorn_bdev_remove_base_devices(struct longhorn_bdev_config *longhorn_cfg, + longhorn_bdev_destruct_cb cb_fn, void *cb_ctx); +int longhorn_bdev_config_add(const char *longhorn_name, uint8_t num_base_bdevs, + struct longhorn_bdev_config **_longhorn_cfg); +int longhorn_bdev_config_add_base_bdev(struct longhorn_bdev_config *longhorn_cfg, + const char *base_bdev_name, uint8_t slot); +void longhorn_bdev_config_cleanup(struct longhorn_bdev_config *longhorn_cfg); +struct longhorn_bdev_config *longhorn_bdev_config_find_by_name(const char *longhorn_name); + + +bool +longhorn_bdev_io_complete_part(struct longhorn_bdev_io *longhorn_io, uint64_t completed, + enum spdk_bdev_io_status status); +void +longhorn_bdev_queue_io_wait(struct longhorn_bdev_io *longhorn_io, struct spdk_bdev *bdev, + struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn); +void +longhorn_bdev_io_complete(struct longhorn_bdev_io *longhorn_io, enum spdk_bdev_io_status status); + +//int longhorn_bdev_add_base_device(const char *name, const char *bdev_name); +int longhorn_bdev_add_base_device(struct longhorn_bdev *longhorn_bdev, struct longhorn_base_bdev_info *base_info); +int longhorn_bdev_remove_replica(char *name, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port); +int +longhorn_bdev_add_replica(const char *name, char *lvs, char *addr, uint16_t nvmf_port, uint16_t comm_port); + +void bdev_longhorn_pause_io(void *cb_arg); +void bdev_longhorn_unpause_io(void *cb_arg); +struct longhorn_bdev *longhorn_bdev_find_by_name(const char *longhorn_name); + +void longhorn_unpause(struct longhorn_bdev *longhorn_bdev); +void longhorn_volume_add_pause_cb(struct longhorn_bdev *longhorn_dev, + longhorn_pause_cb cb_fn, + void *cb_arg); + +#endif /* SPDK_BDEV_RAID_INTERNAL_H */ diff --git a/module/bdev/longhorn/bdev_longhorn_impl.c b/module/bdev/longhorn/bdev_longhorn_impl.c new file mode 100644 index 000000000..69d0e10c4 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_impl.c @@ -0,0 +1,352 @@ +/*- + * BSD LICENSE + * + * Copyright (c) SUSE + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of SUSE nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_longhorn.h" +#include "bdev_longhorn_impl.h" + +#include "spdk/env.h" +#include "spdk/thread.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk/log.h" + +/* + * brief: + * longhorn_bdev_io_completion function is called by lower layers to notify longhorn + * module that particular bdev_io is completed. + * params: + * bdev_io - pointer to bdev io submitted to lower layers, like child io + * success - bdev_io status + * cb_arg - function callback context (parent longhorn_bdev_io) + * returns: + * none + */ +static void +longhorn_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct longhorn_bdev_io *longhorn_io = cb_arg; + + spdk_bdev_free_io(bdev_io); + + if (success) { + SPDK_ERRLOG("io op success\n"); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_SUCCESS); + } else { + SPDK_ERRLOG("io op failure\n"); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void +longhorn_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct longhorn_bdev_io *longhorn_io = cb_arg; + + longhorn_bdev_io_complete_part(longhorn_io, 1, success ? + SPDK_BDEV_IO_STATUS_SUCCESS : + SPDK_BDEV_IO_STATUS_FAILED); + + spdk_bdev_free_io(bdev_io); +} + +static void +_longhorn_submit_rw_request(void *_longhorn_io) +{ + struct longhorn_bdev_io *longhorn_io = _longhorn_io; + + longhorn_submit_rw_request(longhorn_io); +} + +static void +longhorn_submit_read_request(struct longhorn_bdev_io *longhorn_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io); + struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch; + struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev; + int ret = 0; + struct longhorn_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + struct longhorn_base_io_channel *base_channel; + + SPDK_ERRLOG("longhorn_submit_read_request\n"); + assert(longhorn_ch != NULL); + SPDK_ERRLOG("longhorn_submit_read_request\n"); + assert(longhorn_ch->base_channel); + SPDK_ERRLOG("longhorn_submit_read_request\n"); + + if (longhorn_ch->last_read_io_ch) { + + SPDK_ERRLOG("last_read_io_char not null\n"); + longhorn_ch->last_read_io_ch = TAILQ_NEXT(longhorn_ch->last_read_io_ch, channels); + base_channel = longhorn_ch->last_read_io_ch; + } + + + if (!longhorn_ch->last_read_io_ch) { + SPDK_ERRLOG("last_read_io_char null\n"); + longhorn_ch->last_read_io_ch = TAILQ_FIRST(&longhorn_ch->base_channels); + base_channel = longhorn_ch->last_read_io_ch; + } + + + if (!base_channel) { + SPDK_ERRLOG("bdev io submit with no base devices, it should not happen\n"); + return; + } + + base_ch = base_channel->base_channel; + base_info = base_channel->base_info; + + SPDK_ERRLOG("longhorn_submit_read_request base_info %p\n", base_info); + + ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, longhorn_bdev_io_completion, + longhorn_io); + + if (ret == -ENOMEM) { + longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch, + _longhorn_submit_rw_request); + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static void +longhorn_submit_write_request(struct longhorn_bdev_io *longhorn_io) +{ + + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io); + struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch; + struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev; + //uint8_t pd_idx; + int ret = 0; + struct longhorn_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + struct longhorn_base_io_channel *base_channel; + + assert(longhorn_ch != NULL); + assert(longhorn_ch->base_channel); + + if (longhorn_io->base_bdev_io_remaining == 0) { + longhorn_io->base_bdev_io_remaining = longhorn_bdev->num_base_bdevs; + } + + TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) { + //for (pd_idx = 0; pd_idx < longhorn_bdev->num_base_bdevs; pd_idx++) { + //base_ch = longhorn_ch->base_channel[pd_idx]; + //base_info = &longhorn_bdev->base_bdev_info[pd_idx]; + base_ch = base_channel->base_channel; + base_info = base_channel->base_info; + + if (!longhorn_ch->paused) { + ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, longhorn_base_io_complete, + longhorn_io); + + if (ret == -ENOMEM) { + SPDK_ERRLOG("enqueuing bdev io submit due to ENOMEM\n"); + longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch, + _longhorn_submit_rw_request); + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + } + + atomic_fetch_add(&longhorn_bdev->io_ops, 1); + atomic_fetch_add(&longhorn_ch->io_ops, 1); + longhorn_io->submitted = true; + } else { + longhorn_bdev_queue_io_wait(longhorn_io, + base_info->bdev, + base_ch, + _longhorn_submit_rw_request); + } + + } + +} +/* + * brief: + * longhorn_submit_rw_request function is used to submit I/O to the correct + * member disk for longhorn bdevs. + * params: + * longhorn_io + * returns: + * none + */ +void +longhorn_submit_rw_request(struct longhorn_bdev_io *longhorn_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io); + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + longhorn_submit_read_request(longhorn_io); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + longhorn_submit_write_request(longhorn_io); + } else { + SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); + assert(0); + } +} + + +static void +_longhorn_submit_null_payload_request(void *_longhorn_io) +{ + struct longhorn_bdev_io *longhorn_io = _longhorn_io; + + longhorn_submit_null_payload_request(longhorn_io); +} + + +/* + * brief: + * longhorn_submit_null_payload_request function submits the next batch of + * io requests with range but without payload, like FLUSH and UNMAP, to member disks; + * it will submit as many as possible unless one base io request fails with -ENOMEM, + * in which case it will queue itself for later submission. + * params: + * bdev_io - pointer to parent bdev_io on longhorn bdev device + * returns: + * none + */ +void +longhorn_submit_null_payload_request(struct longhorn_bdev_io *longhorn_io) +{ + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(longhorn_io); + struct longhorn_bdev_io_channel *longhorn_ch = longhorn_io->longhorn_ch; + struct longhorn_bdev *longhorn_bdev = longhorn_io->longhorn_bdev; + uint8_t pd_idx; + int ret = 0; + struct longhorn_base_bdev_info *base_info; + struct spdk_io_channel *base_ch; + struct longhorn_base_io_channel *base_channel; + + assert(longhorn_ch != NULL); + assert(longhorn_ch->base_channel); + + if (longhorn_io->base_bdev_io_remaining == 0) { + longhorn_io->base_bdev_io_remaining = longhorn_bdev->num_base_bdevs; + } + + TAILQ_FOREACH(base_channel, &longhorn_ch->base_channels, channels) { + //for (pd_idx = 0; pd_idx < longhorn_bdev->num_base_bdevs; pd_idx++) { + //base_ch = longhorn_ch->base_channel[pd_idx]; + //base_info = &longhorn_bdev->base_bdev_info[pd_idx]; + base_ch = base_channel->base_channel; + base_info = base_channel->base_info; + + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_UNMAP: + SPDK_ERRLOG("unmap\n"); + ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, + longhorn_base_io_complete, longhorn_io); + break; + + case SPDK_BDEV_IO_TYPE_FLUSH: + SPDK_ERRLOG("flush\n"); + ret = spdk_bdev_flush_blocks(base_info->desc, base_ch, + bdev_io->u.bdev.offset_blocks, + bdev_io->u.bdev.num_blocks, + longhorn_base_io_complete, longhorn_io); + break; + + default: + SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); + assert(false); + ret = -EIO; + } + + + + + if (ret == -ENOMEM) { + longhorn_bdev_queue_io_wait(longhorn_io, base_info->bdev, base_ch, + _longhorn_submit_null_payload_request); + } else if (ret != 0) { + SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); + assert(false); + longhorn_bdev_io_complete(longhorn_io, SPDK_BDEV_IO_STATUS_FAILED); + } else { + SPDK_ERRLOG("success\n"); + } + + } +} + +int longhorn_start(struct longhorn_bdev *longhorn_bdev) +{ + uint64_t min_blockcnt = UINT64_MAX; + uint64_t min_blocklen = UINT64_MAX; + struct longhorn_base_bdev_info *base_info; + + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + /* Calculate minimum block count and length from all base bdevs */ + + min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt); + min_blocklen = spdk_min(min_blocklen, base_info->bdev->blocklen); + } + + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev->blockcnt != min_blockcnt) { + SPDK_ERRLOG("Not all disks on RAID 1 has same block count"); + return -EINVAL; + } + + if (base_info->bdev->blocklen != min_blocklen) { + SPDK_ERRLOG("Not all disks on RAID 1 has same block length"); + return -EINVAL; + } + } + + + longhorn_bdev->bdev.blockcnt = min_blockcnt; + + if (longhorn_bdev->num_base_bdevs > 1) { + longhorn_bdev->bdev.split_on_optimal_io_boundary = true; + } else { + /* Do not need to split reads/writes on single bdev RAID modules. */ + longhorn_bdev->bdev.split_on_optimal_io_boundary = false; + } + + return 0; +} + diff --git a/module/bdev/longhorn/bdev_longhorn_impl.h b/module/bdev/longhorn/bdev_longhorn_impl.h new file mode 100644 index 000000000..7adf0cd5b --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_impl.h @@ -0,0 +1,13 @@ +#ifndef SPDK__BDEV_LONGHORN_IMPL_H +#define SPDK__BDEV_LONGHORN_IMPL_H + +int longhorn_start(struct longhorn_bdev *longhorn_bdev); + +void +longhorn_submit_rw_request(struct longhorn_bdev_io *raid_io); + +void +longhorn_submit_null_payload_request(struct longhorn_bdev_io *raid_io); + + +#endif /* SPDK__BDEV_LONGHORN_IMPL_H */ diff --git a/module/bdev/longhorn/bdev_longhorn_lvol.c b/module/bdev/longhorn/bdev_longhorn_lvol.c new file mode 100644 index 000000000..95b6a74a4 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_lvol.c @@ -0,0 +1,205 @@ +#include "spdk/rpc.h" +#include "spdk/bdev.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" +#include "bdev_longhorn_lvol.h" + +#define ALIGN_4K 4096 + +struct spdk_lvol_store * +longhorn_get_lvol_store_by_name(const char *name) +{ + struct spdk_lvol_store *lvs = NULL; + struct lvol_store_bdev *lvs_bdev = vbdev_lvol_store_first(); + + while (lvs_bdev != NULL) { + lvs = lvs_bdev->lvs; + printf("lvs->name = %s\n", lvs->name); + if (strncmp(lvs->name, name, sizeof(lvs->name)) == 0) { + return lvs; + } + lvs_bdev = vbdev_lvol_store_next(lvs_bdev); + } + return NULL; +} + +enum longhorn_lvol_state { + LVOL_SEND_NAME, + LVOL_SEND_HEADER, + LVOL_SEND_TABLE, + LVOL_SEND_CLUSTER +}; + +struct longhorn_lvol_context { + int fd; + bool *busy; + spdk_blob_id blob_id; + char name[256]; + + struct spdk_blob_store *bs; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + uint32_t *table; + struct longhorn_lvol_header header; + + uint64_t io_units_per_cluster; + + uint8_t *cluster; + + enum longhorn_lvol_state state; + + size_t pos; + +}; + + + +uint64_t longhorn_get_allocated_clusters(struct spdk_blob *blob) { + uint64_t allocated_clusters = 0; + uint64_t i = 0; + + for (i = 0; i < blob->active.num_clusters; ++i) { + if (blob->active.clusters[i] != 0) { + ++allocated_clusters; + } + } + + return allocated_clusters; +} + +void longhorn_export_allocated_clusters(struct spdk_blob *blob, uint32_t *table) { + uint64_t i = 0; + uint64_t pos = 0; + + for (i = 0; i < blob->active.num_clusters; ++i) { + if (blob->active.clusters[i] != 0) { + table[pos++] = i; + } + } + +} + +static uint64_t longhorn_get_cluster_offset(struct longhorn_lvol_context *ctx) { + uint64_t offset = ctx->table[ctx->pos++] * ctx->io_units_per_cluster; + + return offset; +} + +static void longhorn_cluster_read_cb(void *arg1, int bserrno) { + struct longhorn_lvol_context *ctx = arg1; + + + if (bserrno) { + return; + } + + printf("writing cluster %u\n", ctx->table[ctx->pos]); + write(ctx->fd, ctx->cluster, ctx->header.cluster_size); + printf("wrote cluster %u\n", ctx->table[ctx->pos]); + + if (ctx->pos < ctx->header.allocated_clusters) { + spdk_blob_io_read(ctx->blob, ctx->channel, ctx->cluster, + longhorn_get_cluster_offset(ctx), + ctx->io_units_per_cluster, + longhorn_cluster_read_cb, + ctx); + } else { + /* Complete */ + //ctx->busy = 0; + } +} + +static void async_write(void *ptr, size_t size, + struct longhorn_lvol_context *ctx, + void (*next)(struct longhorn_lvol_context *arg) + ) { + + +} + +static void longhorn_blob_header(void *arg) { + struct longhorn_lvol_context *ctx = arg; +} + + + + +static void longhorn_blob_opened(void *arg, struct spdk_blob *blob, int bserrno) { + struct longhorn_lvol_context *ctx = arg; + char *name; + size_t len; + + ctx->blob = blob; + + spdk_blob_get_xattr_value(blob, "name", &name, &len); + + printf("name = %s\n", name); + + strncpy(ctx->name, name, len); + + write(ctx->fd, ctx->name, sizeof(ctx->name)); + + ctx->header.num_clusters = blob->active.num_clusters; + ctx->header.allocated_clusters = longhorn_get_allocated_clusters(blob); + ctx->header.cluster_size = ctx->bs->cluster_sz; + ctx->header.io_unit_size = ctx->bs->io_unit_size; + + write(ctx->fd, &ctx->header, sizeof(ctx->header)); + + ctx->table = calloc(1, sizeof(uint32_t) * ctx->header.allocated_clusters); + longhorn_export_allocated_clusters(blob, ctx->table); + + printf("writing table\n"); + write(ctx->fd, ctx->table, sizeof(uint32_t) * ctx->header.allocated_clusters); + printf("wrote table\n"); + + + ctx->cluster = spdk_malloc(ctx->header.cluster_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + ctx->io_units_per_cluster = ctx->header.cluster_size / ctx->header.io_unit_size; + ctx->channel = spdk_bs_alloc_io_channel(ctx->bs); + + if (ctx->pos < ctx->header.allocated_clusters) { + spdk_blob_io_read(ctx->blob, ctx->channel, ctx->cluster, + longhorn_get_cluster_offset(ctx), + ctx->io_units_per_cluster, + longhorn_cluster_read_cb, + ctx); + } + +} + +static void longhorn_lvol_handle_state(struct longhorn_lvol_context *ctx) { + switch (ctx->state) { + case LVOL_SEND_NAME: + break; + case LVOL_SEND_HEADER: + break; + case LVOL_SEND_TABLE: + break; + case LVOL_SEND_CLUSTER: + break; + } +} + + + +void longhorn_lvol_transmit(int fd, uint64_t blob_id, struct spdk_blob_store *bs, bool *busy) { + struct longhorn_lvol_context *ctx; + + ctx = calloc(1, sizeof(struct longhorn_lvol_context)); + + ctx->fd = fd; + ctx->blob_id = (spdk_blob_id) blob_id; + ctx->bs = bs; + + spdk_bs_open_blob(bs, ctx->blob_id, longhorn_blob_opened, ctx); +} + + diff --git a/module/bdev/longhorn/bdev_longhorn_lvol.h b/module/bdev/longhorn/bdev_longhorn_lvol.h new file mode 100644 index 000000000..cc0ebc527 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_lvol.h @@ -0,0 +1,22 @@ +#ifndef SPDK_BDEV_LONGHORN_LVOL__H +#define SPDK_BDEV_LONGHORN_LVOL__H + +struct longhorn_lvol_header { + uint64_t num_clusters; + uint64_t allocated_clusters; + uint32_t cluster_size; + uint32_t io_unit_size; +}; + +struct longhorn_transmit_context; + +struct spdk_lvol_store * +longhorn_get_lvol_store_by_name(const char *name); + +uint64_t longhorn_get_allocated_clusters(struct spdk_blob *blob); +void longhorn_export_allocated_clusters(struct spdk_blob *blob, uint32_t *table); + +//struct longhorn_transmit_context *longhorn_transmit_context_create +void longhorn_lvol_transmit(int fd, uint64_t blob_id, struct spdk_blob_store *bs, bool *busy); +#endif /* SPDK_BDEV_LONGHORN_LVOL__H */ + diff --git a/module/bdev/longhorn/bdev_longhorn_nvmf.c b/module/bdev/longhorn/bdev_longhorn_nvmf.c new file mode 100644 index 000000000..79a24a817 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_nvmf.c @@ -0,0 +1,272 @@ +#include "spdk/nvmf.h" +#include "spdk/util.h" + +#include "bdev_longhorn_nvmf.h" + + +static bool tcp_transport_created = false; + +static void +longhorn_tgt_add_transport_done(void *cb_arg, int status) +{ + + tcp_transport_created = true; + +} + + + +static void +longhorn_subsystem_add_done(struct spdk_nvmf_subsystem *subsystem, + void *cb_arg, int status) { +} + +void longhorn_nvmf_create_transport(spdk_nvmf_tgt_add_transport_done_fn cb_fn, + void *cb_arg) { + struct spdk_nvmf_transport_opts opts; + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_transport *transport; + + spdk_nvmf_transport_opts_init("tcp", &opts, sizeof(opts)); + tgt = spdk_nvmf_get_tgt(NULL); + + transport = spdk_nvmf_transport_create("tcp", &opts); + + if (cb_fn != NULL) { + spdk_nvmf_tgt_add_transport(tgt, transport, cb_fn, cb_arg); + } else { + spdk_nvmf_tgt_add_transport(tgt, transport, + longhorn_tgt_add_transport_done, + NULL); + } +} + + +void longhorn_nvmf_create_subsystem(const char *nqn) { + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_subsystem *subsystem; + + + tgt = spdk_nvmf_get_tgt(NULL); + + subsystem = spdk_nvmf_subsystem_create(tgt, nqn, SPDK_NVMF_SUBTYPE_NVME, + 0); + + spdk_nvmf_subsystem_set_allow_any_host(subsystem, true); + + spdk_nvmf_subsystem_start(subsystem, longhorn_subsystem_add_done, NULL); + +} + +static void populate_tcp_trid(struct spdk_nvme_transport_id *trid, const char *addr, uint16_t port) { + snprintf(trid->trstring, SPDK_NVMF_TRSTRING_MAX_LEN, "TCP"); + + trid->trtype = SPDK_NVME_TRANSPORT_TCP; trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; + + snprintf(trid->traddr, SPDK_NVMF_TRADDR_MAX_LEN, "%s", addr); + snprintf(trid->trsvcid, SPDK_NVMF_TRSVCID_MAX_LEN, "%"PRIu16, port); + +} + + +static void add_listener_cb(void *cb_arg, int status) { + struct spdk_nvme_transport_id *trid = cb_arg; + + free(trid); +} + +static void add_listener_resume_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) { +} + +static void add_listener_pause_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) { + struct spdk_nvme_transport_id *trid = cb_arg; + + spdk_nvmf_subsystem_add_listener(subsystem, trid, add_listener_cb, trid); + + spdk_nvmf_subsystem_resume(subsystem, add_listener_resume_cb, NULL); +} + + +void longhorn_nvmf_subsystem_add_listener(const char *nqn, const char *addr, uint16_t port) { + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvme_transport_id *trid; + + tgt = spdk_nvmf_get_tgt(NULL); + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, nqn); + + trid = calloc(1, sizeof(*trid)); + populate_tcp_trid(trid, addr, port); + + spdk_nvmf_subsystem_pause(subsystem, 0, add_listener_pause_cb, trid); +} + +static void add_ns_resume_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) { +} + +static void add_ns_pause_cb(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status) { + char *bdev_name = cb_arg; + struct spdk_nvmf_ns_opts ns_opts; + + spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts)); + + spdk_nvmf_subsystem_add_ns_ext(subsystem, bdev_name, &ns_opts, sizeof(ns_opts), NULL); + + free(bdev_name); + + spdk_nvmf_subsystem_resume(subsystem, add_ns_resume_cb, NULL); +} + +void longhorn_nvmf_subsystem_add_ns(const char *nqn, const char *bdev_name) { + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_subsystem *subsystem; + + subsystem = spdk_nvmf_tgt_find_subsystem(tgt, nqn); + + spdk_nvmf_subsystem_pause(subsystem, 0, add_ns_pause_cb, bdev_name); +} + +struct longhorn_publish_nvmf_ctx { + longhorn_publish_nvmf_cb cb_fn; + void *cb_arg; +}; + +void longhorn_publish_nvmf(const char *bdev_name, const char *nqn, const char *addr, uint16_t port, longhorn_publish_nvmf_cb cb_fn, void *cb_arg) { + struct spdk_nvmf_tgt *tgt; + struct spdk_nvmf_subsystem *subsystem; + struct spdk_nvmf_ns_opts ns_opts; + struct spdk_nvmf_listen_opts listen_opts; + struct spdk_nvme_transport_id *trid; + + tgt = spdk_nvmf_get_tgt(NULL); + + subsystem = spdk_nvmf_subsystem_create(tgt, nqn, SPDK_NVMF_SUBTYPE_NVME, + 0); + + spdk_nvmf_subsystem_set_allow_any_host(subsystem, true); + + + spdk_nvmf_ns_opts_get_defaults(&ns_opts, sizeof(ns_opts)); + + spdk_nvmf_subsystem_add_ns_ext(subsystem, bdev_name, &ns_opts, sizeof(ns_opts), NULL); + + trid = calloc(1, sizeof(*trid)); + populate_tcp_trid(trid, addr, port); + + spdk_nvmf_listen_opts_init(&listen_opts, sizeof(listen_opts)); + spdk_nvmf_tgt_listen_ext(tgt, trid, &listen_opts); + spdk_nvmf_subsystem_add_listener(subsystem, trid, add_listener_cb, trid); + + spdk_nvmf_subsystem_start(subsystem, longhorn_subsystem_add_done, NULL); + +} + +#define NVME_MAX_BDEVS_PER_RPC 128 + +struct longhorn_attach_nvmf_ctx { + uint32_t count; + size_t bdev_cnt; + const char *names[NVME_MAX_BDEVS_PER_RPC]; + struct spdk_nvme_ctrlr_opts opts; + longhorn_attach_nvmf_cb cb_fn; + void *cb_arg; +}; + + +static void longhorn_wait_for_examine_cb(void *cb_ctx) { + struct longhorn_attach_nvmf_ctx *ctx = cb_ctx; + + ctx->cb_fn(ctx->names, ctx->bdev_cnt, 0, ctx->cb_arg); + + free(ctx); +} + + +static void longhorn_nvme_create_cb(void *cb_ctx, size_t bdev_cnt, int rc) { + struct longhorn_attach_nvmf_ctx *ctx = cb_ctx; + + if (rc < 0) { + ctx->cb_fn(NULL, 0, rc, ctx->cb_arg); + free(ctx); + } else { + ctx->bdev_cnt = bdev_cnt; + spdk_bdev_wait_for_examine(longhorn_wait_for_examine_cb, ctx); + } +} + +void longhorn_attach_nvmf(const char *bdev_name_prefix, const char *nqn, + const char *addr, uint16_t port, + longhorn_attach_nvmf_cb cb_fn, void *cb_arg) { + struct spdk_nvme_transport_id *trid; + size_t len; + struct spdk_nvme_host_id hostid = {}; + uint32_t prchk_flags = 0; + struct longhorn_attach_nvmf_ctx *ctx; + + + trid = calloc(1, sizeof(*trid)); + populate_tcp_trid(trid, addr, port); + + len = strlen(nqn); + memcpy(trid->subnqn, nqn, len + 1); + + + ctx = calloc(1, sizeof(*ctx)); + + ctx->count = NVME_MAX_BDEVS_PER_RPC; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + bdev_nvme_create(&trid, &hostid, bdev_name_prefix, ctx->names, ctx->count, + prchk_flags, longhorn_nvme_create_cb, ctx, &ctx->opts); + +} + +static char *external_addr = NULL; + +struct longhorn_set_external_addr_ctx { + char *addr; + longhorn_set_external_addr_cb cb_fn; + void *cb_arg; +}; + + +static void +longhorn_external_addr_cb(void *cb_arg, int status) +{ + struct longhorn_set_external_addr_ctx *ctx = cb_arg; + + tcp_transport_created = true; + + ctx->cb_fn(ctx->addr, ctx->cb_arg); + free(ctx); +} + + +void longhorn_set_external_addr(const char *addr, + longhorn_set_external_addr_cb cb_fn, + void *cb_arg) +{ + external_addr = strdup(addr); + + if (tcp_transport_created) { + cb_fn(external_addr, cb_arg); + } else { + struct longhorn_set_external_addr_ctx *ctx = + calloc(1, sizeof(struct longhorn_set_external_addr_ctx)); + + ctx->addr = external_addr; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + longhorn_nvmf_create_transport(longhorn_external_addr_cb, + ctx); + } + + +} + + + + diff --git a/module/bdev/longhorn/bdev_longhorn_nvmf.h b/module/bdev/longhorn/bdev_longhorn_nvmf.h new file mode 100644 index 000000000..cf4a82cdd --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_nvmf.h @@ -0,0 +1,28 @@ +#ifndef _BDEV_LONGHORN_NVMF_H_ +#define _BDEV_LONGHORN_NVMF_H_ + +#include "spdk/nvmf.h" + + +#define VOLUME_FORMAT "nqn.2021-12.io.longhorn.volume:%s" +#define REPLICA_FORMAT "nqn.2021-12.io.longhorn.replica:%s/%s" +#define SNAPSHOT_FORMAT "nqn.2021-12.io.longhorn.snapshot:%s" + +void longhorn_nvmf_create_transport(spdk_nvmf_tgt_add_transport_done_fn cb_fn, + void *cb_arg); +void longhorn_nvmf_create_subsystem(const char *nqn); + +typedef void (*longhorn_publish_nvmf_cb)(void *arg); +void longhorn_publish_nvmf(const char *bdev, const char *nqn, const char *addr, uint16_t port, longhorn_publish_nvmf_cb cb_fn, void *cb_arg); + +typedef void (*longhorn_set_external_addr_cb)(const char *addr, void *arg); + +void longhorn_set_external_addr(const char *addr, + longhorn_set_external_addr_cb cb_fn, + void *cb_arg); + +typedef void (*longhorn_attach_nvmf_cb)(const char **bdev_names, size_t bdev_cnt, int status, void *arg); +void longhorn_attach_nvmf(const char *bdev_name_prefix, const char *nqn, const char *addr, uint16_t port, longhorn_attach_nvmf_cb cb_fn, void *cb_arg); + +#endif /* _BDEV_LONGHORN_NVMF_H_ */ + diff --git a/module/bdev/longhorn/bdev_longhorn_rebuild.c b/module/bdev/longhorn/bdev_longhorn_rebuild.c new file mode 100644 index 000000000..c6f628fe2 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_rebuild.c @@ -0,0 +1,634 @@ +#include "spdk/rpc.h" +#include "spdk/bdev.h" +#include "bdev_longhorn.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" +#include "bdev_longhorn_rebuild.h" + +#define ALIGN_4K 4096 + +struct longhorn_blob_info_context { + struct spdk_blob_store *bs; + void (*callback)(struct longhorn_blob_info *info, void *cb_arg); + void *cb_arg; +}; + + +static void longhorn_blob_opened(void *arg, struct spdk_blob *blob, int bserrno) { + struct longhorn_blob_info_context *ctx = arg; + struct longhorn_blob_info info; + size_t len; + + if (!blob) { + (*ctx->callback)(NULL, ctx->cb_arg); + free(ctx); + return; + } + + spdk_blob_get_xattr_value(blob, "name", &(info.name), &len); + + info.num_clusters = blob->active.num_clusters; + info.allocated_clusters = longhorn_get_allocated_clusters(blob); + + info.table = calloc(1, sizeof(uint32_t) * info.allocated_clusters); + + longhorn_export_allocated_clusters(blob, info.table); + + (*ctx->callback)(&info, ctx->cb_arg); + + free(info.table); + + if (blob->parent_id) { + spdk_bs_open_blob(ctx->bs, blob->parent_id, longhorn_blob_opened, ctx); + } else { + (*ctx->callback)(NULL, ctx->cb_arg); + free(ctx); + } +} + +void longhorn_get_blob_info(struct spdk_blob_store *bs, uint64_t blob_id, void (*callback)(struct longhorn_blob_info *info, void *cb_arg), void *cb_arg) { + struct longhorn_blob_info_context *ctx; + + ctx = calloc(1, sizeof(*ctx)); + ctx->bs = bs; + ctx->callback = callback; + ctx->cb_arg = cb_arg; + + spdk_bs_open_blob(ctx->bs, blob_id, longhorn_blob_opened, ctx); +} + + +int bdev_longhorn_lookup_name(const char *name, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg) { + struct spdk_lvol_store *lvs; + struct lvol_store_bdev *lvs_bdev; + + lvs_bdev = vbdev_lvol_store_first(); + + while (lvs_bdev != NULL) { + printf("lvs: %s\n", lvs_bdev->lvs->name); + + lvs_bdev = vbdev_lvol_store_next(lvs_bdev); + } + + return 0; +} + +struct lvs_name *lvs_get_parent(const char *name) +{ + struct spdk_bdev *bdev = NULL; + struct spdk_lvol *lvol = NULL; + spdk_blob_id parent_id; + + bdev = spdk_bdev_get_by_name(name); + + if (bdev != NULL) { + lvol = vbdev_lvol_get_from_bdev(bdev); + + if (lvol != NULL) { + parent_id = lvol->blob->parent_id; + } + } + + + return NULL; +} + +struct longhorn_import_context { + char *name; + char *lvs; + char *file; + struct lvol_store_bdev *lvs_bdev; + + FILE *fp; + + uint64_t blob_id; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + + + uint64_t num_clusters; + uint32_t cluster_size; + uint32_t io_unit_size; + uint64_t current_cluster; + uint64_t allocated_clusters; + + uint32_t *cluster_table; + uint8_t *cluster; + uint32_t current; + +}; + +static void free_longhorn_import_context(struct longhorn_import_context *ctx) { + if (ctx) { + if (ctx->file) { + free(ctx->file); + } + + if (ctx->lvs) { + free(ctx->lvs); + } + + if (ctx->name) { + free(ctx->name); + } + + if (ctx->fp) { + fclose(ctx->fp); + } + + free(ctx); + } +} + +static void +write_next_cluster(void *arg1, int bserrno) { + struct longhorn_import_context *ctx = arg1; + ssize_t nread; + uint64_t offset; + + + if (bserrno) { + printf("error: %d\n", bserrno); + fclose(ctx->fp); + + return; + } + + if (ctx->current >= ctx->allocated_clusters) { + free_longhorn_import_context(ctx); + + printf("Import complete\n"); + return; + } + + + nread = fread(ctx->cluster, 1, ctx->cluster_size, ctx->fp); + + if (nread > 0) { + offset = ctx->cluster_table[ctx->current] * ctx->cluster_size / ctx->io_unit_size; + + ctx->current++; + + spdk_blob_io_write(ctx->blob, ctx->channel, ctx->cluster, offset, + ctx->cluster_size / ctx->io_unit_size, write_next_cluster, ctx); + } +} + + +static void +longhorn_import_blob(struct spdk_blob *blob, + struct longhorn_import_context *ctx) { + uint64_t blob_id = spdk_blob_get_id(blob); + long offset; + + fread(&ctx->num_clusters, sizeof (uint64_t), 1, ctx->fp); + fread(&ctx->allocated_clusters, sizeof (uint64_t), 1, ctx->fp); + fread(&ctx->cluster_size, sizeof (uint32_t), 1, ctx->fp); + fread(&ctx->io_unit_size, sizeof (uint32_t), 1, ctx->fp); + + + ctx->cluster_table = calloc(sizeof (uint32_t), ctx->allocated_clusters); + ctx->cluster = spdk_malloc(ctx->cluster_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + fread(ctx->cluster_table, sizeof(uint32_t), ctx->allocated_clusters, ctx->fp); + + offset = ftell(ctx->fp); + + if (offset % ctx->io_unit_size != 0) { + fseek(ctx->fp, ctx->io_unit_size - offset % ctx->io_unit_size, SEEK_CUR); + } + + ctx->channel = spdk_bs_alloc_io_channel(ctx->blob->bs); + write_next_cluster(ctx, 0); + + printf("here\n"); + +} + +static void +blob_import_iterator_cb(void *arg1, struct spdk_blob *blob, int bserrno) { + struct longhorn_import_context *ctx = arg1; + struct spdk_xattr_names *names; + char *xattr_name = NULL; + uint64_t blob_id; + + const void *value; + size_t value_len; + unsigned int i; + + + if (bserrno) { + if (blob_id != 0) { + //longhorn_import_context(blob_id, ctx); + } else { + free_longhorn_import_context(ctx); + } + return; + } + + blob_id = spdk_blob_get_id(blob); + spdk_blob_get_xattr_names(blob, &names); + + for (i = 0; i < spdk_xattr_names_get_count(names); i++) { + xattr_name = spdk_xattr_names_get_name(names, i); + + if (strcmp(xattr_name, "name") == 0) { + spdk_blob_get_xattr_value(blob, xattr_name, + &value, &value_len); + + if (strncmp(value, ctx->name, strlen(ctx->name)) == 0) { + /* Found our blob. */ + printf("found blob %s\n", ctx->name); + ctx->blob_id = blob_id; + + ctx->blob = blob; + longhorn_import_blob(blob, ctx); + return; + + } else { + printf("%s != %s\n", (char *)value, ctx->name); + } + } + } + + + spdk_bs_iter_next(ctx->lvs_bdev->lvs->blobstore, + blob, + blob_import_iterator_cb, + ctx); +} + + + + + + +int bdev_longhorn_import(const char *name, const char *lvs, const char *file) { + struct lvol_store_bdev *lvs_bdev = NULL;; + struct longhorn_import_context *ctx = NULL; + FILE *fp; + + lvs_bdev = vbdev_lvol_store_first(); + + while (lvs_bdev != NULL) { + if (strcmp(lvs_bdev->lvs->name, lvs) != 0) { + lvs_bdev = vbdev_lvol_store_next(lvs_bdev); + } else { + printf("found lvs %s\n", lvs); + break; + } + } + + if (lvs_bdev != NULL) { + fp = fopen(file, "r"); + + if (fp == NULL) return -1; + + ctx = calloc(1, sizeof(struct longhorn_import_context)); + + ctx->name = strdup(name); + ctx->lvs = strdup(lvs); + ctx->file = strdup(file); + + ctx->lvs_bdev = lvs_bdev; + + ctx->fp = fp; + + spdk_bs_iter_first(ctx->lvs_bdev->lvs->blobstore, + blob_import_iterator_cb, ctx); + + return 0; + } + + + return -1; +} + +static void reopen_blob_cb(void *arg, struct spdk_blob *blob, int bserrno) { + struct spdk_lvol *parent_lvol = arg; + + if (blob != NULL) { + //parent_lvol->blob = blob; + } +} + +void bdev_longhorn_md_sync_complete(void *cb_arg, int bserrno) +{ + struct spdk_lvol *parent_lvol = cb_arg; + + if (bserrno != 0) { + printf("metadata sync failed: %s\n", strerror(bserrno)); + } else { + //spdk_bs_open_blob(parent_lvol->lvol_store->blobstore, parent_lvol->blob->id, reopen_blob_cb, parent_lvol); + + + printf("metadata sync succeeded\n"); + } +} + + +int bdev_longhorn_link(const char *child, const char *parent) +{ + struct spdk_lvol_store *lvs = NULL; + struct spdk_bdev *parent_bdev = NULL; + struct spdk_bdev *child_bdev = NULL; + struct spdk_lvol *parent_lvol = NULL; + struct spdk_lvol *child_lvol = NULL; + int bserrno; + + + parent_bdev = spdk_bdev_get_by_name(parent); + child_bdev = spdk_bdev_get_by_name(child); + + if (parent_bdev == NULL) { + printf("can't find bdev for %s\n", parent); + return; + } + + if (child_bdev == NULL) { + printf("can't find bdev for %s\n", child); + return; + } + + parent_lvol = vbdev_lvol_get_from_bdev(parent_bdev); + child_lvol = vbdev_lvol_get_from_bdev(child_bdev); + + if (parent_lvol == NULL) { + printf("can't find lvol for %s\n", parent); + return; + } + + if (child_lvol == NULL) { + printf("can't find lvol for %s\n", child); + return; + } + + bserrno = spdk_blob_set_internal_xattr(parent_lvol->blob, BLOB_SNAPSHOT, &child_lvol->blob->id, sizeof(spdk_blob_id)); + + printf("syncing metadata\n"); + spdk_blob_sync_md(parent_lvol->blob, bdev_longhorn_md_sync_complete, parent_lvol); + + + return 0; +} + +struct snapshot_rpc { + char *name; + uint64_t num_clusters; + uint32_t allocated_clusters; + uint32_t *active_clusters; +}; + +#define MAX_SNAPSHOTS 256 +struct snapshots_rpc { + size_t num_snapshots; + struct snapshot_rpc snapshots[MAX_SNAPSHOTS]; +}; + + +struct children_rpc { + char *name; + uint64_t cluster_size; + uint32_t io_unit_size; + + struct snapshots_rpc snapshots; + +}; + +static int json_decode_clusters(const struct spdk_json_val *val, void *out) { + uint32_t *clusters = out; + struct snapshot_rpc *snapshot = SPDK_CONTAINEROF(clusters, struct snapshot_rpc, active_clusters); + size_t dummy; + int error; + uint32_t i; + + printf("name = %s\n", snapshot->name); + printf("num_clusters = %lu\n", snapshot->num_clusters); + printf("allocated_clusters = %u\n", snapshot->allocated_clusters); + snapshot->active_clusters = calloc(sizeof(uint32_t), snapshot->allocated_clusters); + + error = spdk_json_decode_array(val, spdk_json_decode_uint32, snapshot->active_clusters, snapshot->allocated_clusters, &dummy, sizeof(uint32_t)); + + + for (int i = 0; i < snapshot->allocated_clusters; ++i) { + printf("%u\n", snapshot->active_clusters[i]); + } + + return error; +} + + +static const struct spdk_json_object_decoder rpc_snapshot_decoders[] = { + {"name", offsetof(struct snapshot_rpc, name), spdk_json_decode_string}, + {"num_clusters", offsetof(struct snapshot_rpc, num_clusters), spdk_json_decode_uint64}, + {"allocated_clusters", offsetof(struct snapshot_rpc, allocated_clusters), spdk_json_decode_uint32}, + {"active_clusters", offsetof(struct snapshot_rpc, active_clusters), json_decode_clusters}, +}; + + +static int json_decode_snapshot(const struct spdk_json_val *val, void *out) { + int error; + + error = spdk_json_decode_object(val, rpc_snapshot_decoders, + SPDK_COUNTOF(rpc_snapshot_decoders), + out); + + return error; +} + +static int json_decode_snapshots(const struct spdk_json_val *val, void *out) { + struct snapshots_rpc *snapshots = out; + int error = 0; + + error = spdk_json_decode_array(val, json_decode_snapshot, snapshots->snapshots, MAX_SNAPSHOTS, &snapshots->num_snapshots, sizeof(struct snapshot_rpc)); + + return error; +} + +static const struct spdk_json_object_decoder rpc_replica_decoders[] = { + {"name", offsetof(struct children_rpc, name), spdk_json_decode_string}, + {"cluster_size", offsetof(struct children_rpc, cluster_size), spdk_json_decode_uint64}, + {"io_unit_size", offsetof(struct children_rpc, io_unit_size), spdk_json_decode_uint32}, + {"snapshots", offsetof(struct children_rpc, snapshots), json_decode_snapshots}, +}; + + +static void receive_children(const char *addr, + const char *command, + int32_t id, + struct spdk_json_val *result, + struct spdk_json_val *error, + void *arg) { + int i = 0; + char *data = (char *)result->start; + uint64_t blob_id; + struct spdk_json_val *value; + struct children_rpc children = {}; + + printf("received response. %ld, %s\n", result->len, data); + + if (spdk_json_decode_object(result, rpc_replica_decoders, + SPDK_COUNTOF(rpc_replica_decoders), + &children)) { + printf("error decoding\n"); + } + + + + +#if 0 + if (result->type == SPDK_JSON_VAL_OBJECT_BEGIN) { + value = spdk_json_object_first(result); + + while (value != NULL) { + if (spdk_json_decode_uint64(value, &blob_id) == 0) { + printf("%016lx %lu\n", blob_id, blob_id); + } + + value = spdk_json_next(value); + } + } +#endif + + +} + +struct rebuild_context { + struct spdk_lvol_store *lvs; + char *prefix; +}; + +static void receive_replicas(const char *addr, + const char *command, + int32_t id, + struct spdk_json_val *result, + struct spdk_json_val *error, + void *arg) { + int i = 0; + char *data = (char *)result->start; + uint64_t blob_id; + struct spdk_json_val *value; + struct children_rpc children = {}; + struct rebuild_context *ctx = arg; + char *bdev_name; + char *last_bdev_name = NULL;; + + printf("receive_replicas"); + + printf("received response. %ld, %s\n", result->len, data); + + if (spdk_json_decode_object(result, rpc_replica_decoders, + SPDK_COUNTOF(rpc_replica_decoders), + &children)) { + printf("error decoding\n"); + } + + printf("num of snapshots %d\n", children.snapshots.num_snapshots); + + for (i = children.snapshots.num_snapshots - 1; i >= 0; --i) { + bdev_name = spdk_sprintf_alloc("%s%s", ctx->prefix, children.snapshots.snapshots[i].name); + + printf("syncing %s\n", children.snapshots.snapshots[i].name); + + longhorn_snapshot_bdev_sync(bdev_name, + children.snapshots.snapshots[i].name, + ctx->lvs, + children.snapshots.snapshots[i].num_clusters, + children.snapshots.snapshots[i].allocated_clusters, + children.cluster_size, + children.io_unit_size, + children.snapshots.snapshots[i].active_clusters); + + if (last_bdev_name) { + bdev_longhorn_link(bdev_name, last_bdev_name); + free(last_bdev_name); + } + + last_bdev_name = bdev_name; + + } + + if (last_bdev_name) { + free(last_bdev_name); + } +} + + + +void bdev_longhorn_get_children_remote(const char *address, + uint16_t port, + const char *name) { + char *addr = NULL; + struct spdk_json_write_ctx *w; + struct spdk_jsonrpc_client_request *request; + + addr = spdk_sprintf_alloc("%s:%d", address, port); + + printf("%s:%d:%s\n", address, port, name); + + json_remote_client(addr); + + request = spdk_jsonrpc_client_create_request(); + + w = spdk_jsonrpc_begin_request(request, 1, "lvol_list_children"); + spdk_json_write_name(w, "params"); + spdk_json_write_object_begin(w); + spdk_json_write_name(w, "name"); + spdk_json_write_string(w, name); + spdk_json_write_object_end(w); + + spdk_jsonrpc_end_request(request, w); + + json_remote_client_send_command(addr, "lvol_list_children", + 1, request, receive_children, NULL); + + + free(addr); +} + + +void bdev_longhorn_rebuild_remote(const char *address, + uint16_t port, + const char *name, + char *remote_prefix, + struct spdk_lvol_store *lvs) { + char *addr = NULL; + struct spdk_json_write_ctx *w; + struct spdk_jsonrpc_client_request *request; + struct rebuild_context *ctx; + struct spdk_lvol_store *store; + + addr = spdk_sprintf_alloc("%s:%d", address, port); + + printf("%s:%d:%s\n", address, port, name); + + json_remote_client(addr); + + request = spdk_jsonrpc_client_create_request(); + + w = spdk_jsonrpc_begin_request(request, 1, "lvol_list_children"); + spdk_json_write_name(w, "params"); + spdk_json_write_object_begin(w); + spdk_json_write_name(w, "name"); + spdk_json_write_string(w, name); + spdk_json_write_object_end(w); + + spdk_jsonrpc_end_request(request, w); + + ctx = calloc(1, sizeof(*ctx)); + ctx->prefix = strdup(remote_prefix); + ctx->lvs = lvs; + + json_remote_client_send_command(addr, "lvol_list_children", + 1, request, receive_replicas, ctx); + + + free(addr); +} + diff --git a/module/bdev/longhorn/bdev_longhorn_rebuild.h b/module/bdev/longhorn/bdev_longhorn_rebuild.h new file mode 100644 index 000000000..b3f2f3218 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_rebuild.h @@ -0,0 +1,32 @@ +#ifndef BDEV_LONGHORN_REBUILD_H +#define BDEV_LONGHORN_REBUILD_H + +#include "lib/blob/blobstore.h" +#include "spdk/queue.h" + +struct lvs_name { + spdk_blob_id id; + +}; + +struct longhorn_blob_info { + char *name; + uint64_t num_clusters; + uint64_t allocated_clusters; + uint32_t *table; +}; + +void longhorn_get_blob_info(struct spdk_blob_store *bs, uint64_t blob_id, void (*callback)(struct longhorn_blob_info *info, void *cb_arg), void *cb_arg); + +int bdev_longhorn_lookup_name(const char *name, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg); +int bdev_longhorn_import(const char *name, const char *lvs, const char *file); +int bdev_longhorn_link(const char *child, const char *parent); +void bdev_longhorn_get_children_remote(const char *address, + uint16_t port, + const char *name); +void bdev_longhorn_rebuilt_remote(const char *address, + uint16_t port, + const char *name, + char *remote_prefix, + struct spdk_lvol_store *lvs); +#endif /* BDEV_LONGHORN_REBUILD_H */ diff --git a/module/bdev/longhorn/bdev_longhorn_rebuild_rpc.c b/module/bdev/longhorn/bdev_longhorn_rebuild_rpc.c new file mode 100644 index 000000000..f63e1a968 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_rebuild_rpc.c @@ -0,0 +1,446 @@ +#include +#include +#include + +#include "spdk/stdinc.h" + +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/uuid.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/jsonrpc.h" +#include "spdk/env.h" +#include "spdk/init.h" +#include "spdk/thread.h" +#include "bdev_longhorn_rebuild.h" +#include "bdev_longhorn_remote_sync.h" +#include "bdev_longhorn_sync_client.h" +#include "bdev_longhorn_lvol.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" + +struct rpc_lvol_list_children { + char *name; +}; + +static const struct spdk_json_object_decoder rpc_lvol_list_children_decoders[] = { + {"name", offsetof(struct rpc_lvol_list_children, name), spdk_json_decode_string, true}, +}; + +struct longhorn_child_blob_context { + struct spdk_json_write_ctx *w; + struct spdk_jsonrpc_request *request; +}; + + +static void longhorn_child_blob_info(struct longhorn_blob_info *info, void *cb_arg) { + struct longhorn_child_blob_context *ctx = cb_arg; + uint64_t i; + + if (info) { + spdk_json_write_object_begin(ctx->w); + + spdk_json_write_named_string(ctx->w, "name", info->name); + spdk_json_write_named_uint64(ctx->w, "num_clusters", info->num_clusters); + spdk_json_write_named_uint32(ctx->w, "allocated_clusters", info->allocated_clusters); + + spdk_json_write_named_array_begin(ctx->w, "active_clusters"); + for (i = 0; i < info->allocated_clusters; ++i) { + spdk_json_write_uint32(ctx->w, info->table[i]); + } + spdk_json_write_array_end(ctx->w); + + spdk_json_write_object_end(ctx->w); + } else { + spdk_json_write_array_end(ctx->w); + spdk_json_write_object_end(ctx->w); + + spdk_jsonrpc_end_result(ctx->request, ctx->w); + free(ctx); + } + +} + + +static void +rpc_bdev_lvol_list_children(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_lvol_list_children req = {NULL}; + struct spdk_json_write_ctx *w; + struct spdk_bdev *bdev = NULL; + struct spdk_lvol *lvol = NULL; + struct longhorn_child_blob_context *ctx = NULL; + //spdk_blob_id parent_id; + + + + if (spdk_json_decode_object(params, rpc_lvol_list_children_decoders, + SPDK_COUNTOF(rpc_lvol_list_children_decoders), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + //free_rpc_construct_malloc(&req); + return; + } + + + + if (!req.name) { + spdk_jsonrpc_send_error_response(request, -EINVAL, + "Name must be provided"); + } + + + + w = spdk_jsonrpc_begin_result(request); + + spdk_json_write_object_begin(w); + bdev = spdk_bdev_get_by_name(req.name); + + if (bdev != NULL) { + lvol = vbdev_lvol_get_from_bdev(bdev); + + if (lvol != NULL) { + + spdk_json_write_named_string(w, "name", req.name); + spdk_json_write_named_uint64(w, "cluster_size", lvol->lvol_store->blobstore->cluster_sz); + spdk_json_write_named_uint32(w, "io_unit_size", lvol->lvol_store->blobstore->io_unit_size); + + + spdk_json_write_named_array_begin(w, "snapshots"); + + ctx = calloc(1, sizeof(*ctx)); + ctx->w = w; + ctx->request = request; + longhorn_get_blob_info(lvol->lvol_store->blobstore, lvol->blob_id, longhorn_child_blob_info, ctx); +#if 0 + + + + while (parent_id != SPDK_BLOBID_INVALID) { + spdk_json_write_uint64(w, parent_id); + parent_id = spdk_blob_get_parent_snapshot(lvol->lvol_store->blobstore, parent_id); + } +#endif + //spdk_json_write_array_end(w); + } + } + +#if 0 + spdk_json_write_object_end(w); + + + + spdk_jsonrpc_end_result(request, w); +#endif + +} +SPDK_RPC_REGISTER("lvol_list_children", rpc_bdev_lvol_list_children, SPDK_RPC_RUNTIME) + + +struct rpc_lvol_list_children_remote { + char *address; + uint16_t *port; + char *name; +}; +static const struct spdk_json_object_decoder rpc_lvol_list_children_remote_decoders[] = { + {"address", offsetof(struct rpc_lvol_list_children_remote, address), spdk_json_decode_string, true}, + {"port", offsetof(struct rpc_lvol_list_children_remote, port), spdk_json_decode_uint16, true}, + {"name", offsetof(struct rpc_lvol_list_children_remote, name), spdk_json_decode_string, true}, +}; + +static void +rpc_bdev_lvol_list_children_remote(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_lvol_list_children_remote req = {NULL}; + struct spdk_json_write_ctx *w; + + + if (spdk_json_decode_object(params, rpc_lvol_list_children_remote_decoders, + SPDK_COUNTOF(rpc_lvol_list_children_remote_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + bdev_longhorn_get_children_remote(req.address, req.port, req.name); +} + +struct rpc_lvol_rebuild_remote { + char *address; + uint16_t *port; + char *name; + char *remote_prefix; + char *lvs; + +}; + + +static const struct spdk_json_object_decoder rpc_lvol_rebuild_remote_decoders[] = { + {"address", offsetof(struct rpc_lvol_rebuild_remote, address), spdk_json_decode_string, true}, + {"port", offsetof(struct rpc_lvol_rebuild_remote, port), spdk_json_decode_uint16, true}, + {"name", offsetof(struct rpc_lvol_rebuild_remote, name), spdk_json_decode_string, true}, + {"remote_prefix", offsetof(struct rpc_lvol_rebuild_remote, remote_prefix), spdk_json_decode_string, true}, + {"lvs", offsetof(struct rpc_lvol_rebuild_remote, lvs), spdk_json_decode_string, true}, +}; + +static void +rpc_bdev_lvol_rebuild_remote(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_lvol_rebuild_remote req = {NULL}; + struct spdk_json_write_ctx *w; + struct spdk_lvol_store *lvs; + + + if (spdk_json_decode_object(params, rpc_lvol_rebuild_remote_decoders, + SPDK_COUNTOF(rpc_lvol_rebuild_remote_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + lvs = longhorn_get_lvol_store_by_name(req.lvs); + + bdev_longhorn_rebuild_remote(req.address, req.port, req.name, req.remote_prefix, lvs); +} + + +SPDK_RPC_REGISTER("lvol_rebuild_remote", rpc_bdev_lvol_rebuild_remote, SPDK_RPC_RUNTIME) + + +struct rpc_lvol_import { + char *name; + char *lvs; + char *file; +}; + +static const struct spdk_json_object_decoder rpc_lvol_import_decoders[] = { + {"name", offsetof(struct rpc_lvol_import, name), spdk_json_decode_string, true}, + {"lvs", offsetof(struct rpc_lvol_import, lvs), spdk_json_decode_string, true}, + {"file", offsetof(struct rpc_lvol_import, file), spdk_json_decode_string, true}, +}; + +static void +rpc_bdev_lvol_import(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_lvol_import req = {NULL}; + struct spdk_json_write_ctx *w; + + + if (spdk_json_decode_object(params, rpc_lvol_import_decoders, + SPDK_COUNTOF(rpc_lvol_import_decoders), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + //free_rpc_construct_malloc(&req); + return; + } + + + if (!req.name) { + spdk_jsonrpc_send_error_response(request, -EINVAL, + "Name must be provided"); + } + + if (!req.file) { + spdk_jsonrpc_send_error_response(request, -EINVAL, + "Name must be provided"); + } + + + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_string(w, req.name); + spdk_json_write_string(w, req.file); + spdk_json_write_string(w, req.lvs); + + + spdk_jsonrpc_end_result(request, w); + + + bdev_longhorn_import(req.name, req.lvs, req.file); +} + +SPDK_RPC_REGISTER("lvol_import", rpc_bdev_lvol_import, SPDK_RPC_RUNTIME) + +struct rpc_tcp_json_server { + char *address; + uint16_t port; +}; + +static const struct spdk_json_object_decoder rpc_tcp_json_server_decoders[] = { + {"address", offsetof(struct rpc_tcp_json_server, address), spdk_json_decode_string, true}, + {"port", offsetof(struct rpc_tcp_json_server, port), spdk_json_decode_uint16, true}, +}; + +struct tcp_server_entry { + struct sockaddr_in addr; + struct spdk_jsonrpc_server *server; + struct spdk_poller *poller; + + TAILQ_ENTRY(tcp_server_entry) entries; + +}; + +static TAILQ_HEAD(, tcp_server_entry) tcp_servers = TAILQ_HEAD_INITIALIZER(tcp_servers); + +static int +tcp_server_poll(void *arg) +{ + struct tcp_server_entry *entry = arg; + spdk_jsonrpc_server_poll(entry->server); + return SPDK_POLLER_BUSY; +} + +static void +rpc_create_tcp_json_server(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_tcp_json_server req = {NULL}; + struct tcp_server_entry *entry; + + if (spdk_json_decode_object(params, rpc_tcp_json_server_decoders, + SPDK_COUNTOF(rpc_tcp_json_server_decoders), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + printf("%s:%d\n", req.address, req.port); + + + entry = calloc(1, sizeof(struct tcp_server_entry)); + + inet_aton(req.address, &entry->addr.sin_addr); + entry->addr.sin_port = htons(req.port); + entry->addr.sin_family = AF_INET; + + entry->server = spdk_jsonrpc_server_listen(AF_INET, 0, &entry->addr, + sizeof(struct sockaddr_in), + spdk_rpc_handler); + entry->poller = SPDK_POLLER_REGISTER(tcp_server_poll, entry, 4000); + + TAILQ_INSERT_TAIL(&tcp_servers, entry, entries); + + spdk_jsonrpc_send_bool_response(request, true); +} + +SPDK_RPC_REGISTER("tcp_json_server", rpc_create_tcp_json_server, SPDK_RPC_RUNTIME) + +struct rpc_link_lvols { + char *child; + char *parent; +}; + +static const struct spdk_json_object_decoder rpc_link_lvols_decoder[] = { + {"child", offsetof(struct rpc_link_lvols, child), spdk_json_decode_string, true}, + {"parent", offsetof(struct rpc_link_lvols, parent), spdk_json_decode_string, true}, +}; + +static void +rpc_lvol_link(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) +{ + struct rpc_link_lvols req = {NULL}; + + if (spdk_json_decode_object(params, rpc_link_lvols_decoder, + SPDK_COUNTOF(rpc_link_lvols_decoder), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + printf("%s:%s\n", req.child, req.parent); + + bdev_longhorn_link(req.child, req.parent); +} + +SPDK_RPC_REGISTER("link_lvols", rpc_lvol_link, SPDK_RPC_RUNTIME) + +struct rpc_tcp_sync_server { + char *address; + uint16_t port; + char *lvs; +}; + +static const struct spdk_json_object_decoder rpc_tcp_sync_server_decoders[] = { + {"address", offsetof(struct rpc_tcp_sync_server, address), spdk_json_decode_string, true}, + {"port", offsetof(struct rpc_tcp_sync_server, port), spdk_json_decode_uint16, true}, + {"lvs", offsetof(struct rpc_tcp_sync_server, lvs), spdk_json_decode_string, true}, +}; + +static void +rpc_create_tcp_sync_server(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_tcp_sync_server req = {NULL}; + struct spdk_lvol_store *lvs; + + if (spdk_json_decode_object(params, rpc_tcp_sync_server_decoders, + SPDK_COUNTOF(rpc_tcp_sync_server_decoders), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + + lvs = longhorn_get_lvol_store_by_name(req.lvs); + + longhorn_remote_sync_server(req.address, req.port, lvs); + + spdk_jsonrpc_send_bool_response(request, true); +} + +SPDK_RPC_REGISTER("tcp_sync_server", rpc_create_tcp_sync_server, SPDK_RPC_RUNTIME) + +struct rpc_sync_client { + char *address; + uint16_t port; + uint64_t blob_id; + char *lvs; +}; + +static const struct spdk_json_object_decoder rpc_sync_client_decoders[] = { + {"address", offsetof(struct rpc_sync_client, address), spdk_json_decode_string, true}, + {"port", offsetof(struct rpc_sync_client, port), spdk_json_decode_uint16, true}, + {"blob_id", offsetof(struct rpc_sync_client, blob_id), spdk_json_decode_uint64, true}, + {"lvs", offsetof(struct rpc_sync_client, lvs), spdk_json_decode_string, true}, +}; + +static void +rpc_create_sync_client(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_sync_client req = {NULL}; + struct spdk_lvol_store *lvs; + + if (spdk_json_decode_object(params, rpc_sync_client_decoders, + SPDK_COUNTOF(rpc_sync_client_decoders), + &req)) { + SPDK_DEBUGLOG(bdev_malloc, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + lvs = longhorn_get_lvol_store_by_name(req.lvs); + + longhorn_sync_client(req.address, req.port, req.blob_id, lvs); + +} + +SPDK_RPC_REGISTER("sync_client", rpc_create_sync_client, SPDK_RPC_RUNTIME) diff --git a/module/bdev/longhorn/bdev_longhorn_remote.c b/module/bdev/longhorn/bdev_longhorn_remote.c new file mode 100644 index 000000000..f39ff5874 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_remote.c @@ -0,0 +1,166 @@ +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/jsonrpc.h" +#include "spdk/thread.h" +#include "bdev_longhorn_remote.h" + +struct tcp_client_handler_entry { + char *command; + int32_t id; + + void *arg; + + json_remote_response_handler_fn fn; + + TAILQ_ENTRY(tcp_client_handler_entry) entries; +}; + + +struct tcp_client_entry { + const char *addr; + struct spdk_jsonrpc_client *client; + + TAILQ_HEAD(, tcp_client_handler_entry) handlers; + + TAILQ_ENTRY(tcp_client_entry) entries; +}; + +static TAILQ_HEAD(, tcp_client_entry) tcp_clients = TAILQ_HEAD_INITIALIZER(tcp_clients); +static struct spdk_poller *poller = NULL; + +static void handler_free(struct tcp_client_handler_entry *handler) { + if (handler) { + free(handler->command); + free(handler); + } + +} + + +static void remote_client_handle(struct tcp_client_entry *entry) +{ + struct spdk_jsonrpc_client_response *response = NULL; + int32_t id = 0; + struct tcp_client_handler_entry *handler = NULL; + struct tcp_client_handler_entry *next = NULL; + + response = spdk_jsonrpc_client_get_response(entry->client); + + if (spdk_json_number_to_int32(response->id, &id) != 0) { + printf("Unable to decode TCP client message.\n"); + + spdk_jsonrpc_client_free_response(response); + + return; + } + + handler = TAILQ_FIRST(&entry->handlers); + + while (handler != NULL) { + next = TAILQ_NEXT(handler, entries); + + if (handler->id == id) { + (*(handler->fn))(entry->addr, handler->command, id, + response->result, response->error, + handler->arg); + + TAILQ_REMOVE(&entry->handlers, handler, entries); + handler_free(handler); + + break; + } + + handler = next; + } + + spdk_jsonrpc_client_free_response(response); +} + + +static int remote_client_poll(void *arg) +{ + struct tcp_client_entry *entry = NULL; + struct tcp_client_entry *next = NULL; + int error = 0; + + entry = TAILQ_FIRST(&tcp_clients); + + while (entry != NULL) { + next = TAILQ_NEXT(entry, entries); + + error = spdk_jsonrpc_client_poll(entry->client, 0); + + if (error > 0) { + remote_client_handle(entry); + } else if (error == -EIO) { + } + + entry = next; + } + + return SPDK_POLLER_BUSY; +} + +static struct tcp_client_entry * +json_client_lookup(const char *addr) { + struct tcp_client_entry *entry = NULL; + + TAILQ_FOREACH(entry, &tcp_clients, entries) { + if (strcmp(addr, entry->addr) == 0) { + return entry; + } + } + + return NULL; +} + +int json_remote_client(const char *addr) +{ + struct spdk_jsonrpc_client *client = NULL; + struct tcp_client_entry *entry = json_client_lookup(addr); + + if (entry == NULL) { + client = spdk_jsonrpc_client_connect(addr, AF_INET); + + if (client != NULL) { + entry = calloc(1, sizeof(struct tcp_client_entry)); + + entry->addr = strdup(addr); + entry->client = client; + TAILQ_INIT(&entry->handlers); + + TAILQ_INSERT_TAIL(&tcp_clients, entry, entries); + + if (poller == NULL) { + poller = SPDK_POLLER_REGISTER(remote_client_poll, + NULL, 4000); + } + } + } + + return 0; +} + +int json_remote_client_send_command(const char *addr, + const char *command, + int32_t id, + struct spdk_jsonrpc_client_request *request, + json_remote_response_handler_fn fn, + void *arg) { + + struct tcp_client_entry *entry = json_client_lookup(addr); + struct tcp_client_handler_entry *handler; + + if (entry) { + handler = calloc(1, sizeof(struct tcp_client_handler_entry)); + handler->command = strdup(command); + handler->id = id; + handler->fn = fn; + handler->arg = arg; + + TAILQ_INSERT_TAIL(&entry->handlers, handler, entries); + spdk_jsonrpc_client_send_request(entry->client, request); + } + + return 0; +} diff --git a/module/bdev/longhorn/bdev_longhorn_remote.h b/module/bdev/longhorn/bdev_longhorn_remote.h new file mode 100644 index 000000000..01180d1eb --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_remote.h @@ -0,0 +1,21 @@ +#ifndef SPDK_BDEV_LONGHORN_REMOTE__H +#define SPDK_BDEV_LONGHORN_REMOTE__H + +typedef void (*json_remote_response_handler_fn)(const char *addr, + const char *command, + int32_t id, + struct spdk_json_val *result, + struct spdk_json_val *error, + void *arg); + +int json_remote_client(const char *addr); +int json_remote_client_send_command(const char *addr, + const char *command, + int32_t id, + struct spdk_jsonrpc_client_request *request, + json_remote_response_handler_fn fn, + void *arg); + + + +#endif /* SPDK_BDEV_LONGHORN_REMOTE__H */ diff --git a/module/bdev/longhorn/bdev_longhorn_remote_sync.c b/module/bdev/longhorn/bdev_longhorn_remote_sync.c new file mode 100644 index 000000000..c18843530 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_remote_sync.c @@ -0,0 +1,190 @@ +#include +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/jsonrpc.h" +#include "spdk/thread.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" +#include "bdev_longhorn_lvol.h" + + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +struct longhorn_server_connection_entry { + int fd; + + int busy; + struct spdk_lvol_store *lvs; + + + TAILQ_ENTRY(longhorn_server_connection_entry) entries; + +}; + +struct longhorn_server_entry { + struct sockaddr_in addr; + int fd; + struct spdk_lvol_store *lvs; + + TAILQ_HEAD(, longhorn_server_connection_entry) connections; + + TAILQ_ENTRY(longhorn_server_entry) entries; +}; + +static TAILQ_HEAD(, longhorn_server_entry) sync_servers = TAILQ_HEAD_INITIALIZER(sync_servers); + +static int sync_connection_readable(struct longhorn_server_connection_entry *entry) { + uint64_t blob_id; + + + printf("fd readable\n"); + if (!entry->busy) { + read(entry->fd, &blob_id, sizeof(blob_id)); + + printf("%lx %lu\n", blob_id, blob_id); + + + entry->busy = 1; + + longhorn_lvol_transmit(entry->fd, blob_id, entry->lvs->blobstore, &entry->busy); + + } + + return 0; +} + +static void set_nonblocking(int fd) { + int fdflags = fcntl(fd, F_GETFL); + + fdflags |= O_NONBLOCK; + fcntl(fd, F_SETFL, fdflags); +} + + + +struct longhorn_server_connection_entry *longhorn_new_connection(int fd, struct spdk_lvol_store *lvs) { + struct longhorn_server_connection_entry *entry; + struct sockaddr_in remote_addr = {'\0'}; + socklen_t addrlen = sizeof(remote_addr); + int remote_fd; + + remote_fd = accept(fd, (struct sockaddr *)&remote_addr, &addrlen); + + set_nonblocking(remote_fd); + + if (remote_fd > 0) { + entry = calloc(1, sizeof(struct longhorn_server_connection_entry)); + entry->fd = remote_fd; + entry->lvs = lvs; + + return entry; + } + + return NULL; +} + + +static int longhorn_sync_poll(void *arg) { + fd_set rdset; + fd_set wrset; + fd_set errset; + struct longhorn_server_entry *entry; + struct longhorn_server_connection_entry *connection; + struct longhorn_server_connection_entry *next_connection; + struct longhorn_server_connection_entry *new_connection; + int max_fd = 0; + struct timeval timeout = {0, 0}; + + FD_ZERO(&rdset); + FD_ZERO(&errset); + FD_ZERO(&wrset); + + TAILQ_FOREACH(entry, &sync_servers, entries) { + max_fd = MAX(max_fd, entry->fd); + + FD_SET(entry->fd, &rdset); + FD_SET(entry->fd, &errset); + TAILQ_FOREACH(connection, &entry->connections, entries) { + max_fd = MAX(max_fd, connection->fd); + + FD_SET(connection->fd, &rdset); + FD_SET(connection->fd, &errset); + FD_SET(connection->fd, &wrset); + } + } + + if (select(max_fd + 1, &rdset, NULL, &errset, &timeout) > 0) { + TAILQ_FOREACH(entry, &sync_servers, entries) { + if (FD_ISSET(entry->fd, &rdset)) { + new_connection = longhorn_new_connection(entry->fd, entry->lvs); + + if (new_connection != NULL) { + TAILQ_INSERT_TAIL(&entry->connections, new_connection, entries); + } + + } + + connection = TAILQ_FIRST(&entry->connections); + + while(connection != NULL) { + next_connection = TAILQ_NEXT(connection, entries); + + if (FD_ISSET(connection->fd, &rdset)) { + sync_connection_readable(connection); + } + + connection = next_connection; + } + } + + } + + return SPDK_POLLER_BUSY; + + +} + +static struct spdk_poller *poller = NULL; + +int longhorn_remote_sync_server(const char *addr, uint16_t port, struct spdk_lvol_store *lvs) { + struct longhorn_server_entry *entry; + + entry = calloc(1, sizeof(struct longhorn_server_entry)); + + inet_aton(addr, &entry->addr.sin_addr); + entry->addr.sin_port = htons(port); + entry->addr.sin_family = AF_INET; + + /* TODO check return values */ + entry->fd = socket(AF_INET, SOCK_STREAM, 0); + + bind(entry->fd, (struct sockaddr *) &entry->addr, sizeof(struct sockaddr_in)); + + listen(entry->fd, 10); + + set_nonblocking(entry->fd); + + entry->lvs = lvs; + + TAILQ_INIT(&entry->connections); + TAILQ_INSERT_TAIL(&sync_servers, entry, entries); + + if (poller == NULL) { + poller = SPDK_POLLER_REGISTER(longhorn_sync_poll, NULL, 4000); + } + + return 0; +} + + + + + diff --git a/module/bdev/longhorn/bdev_longhorn_remote_sync.h b/module/bdev/longhorn/bdev_longhorn_remote_sync.h new file mode 100644 index 000000000..1965ad1d5 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_remote_sync.h @@ -0,0 +1,7 @@ +#ifndef SPDK_BDEV_LONGHORN_REMOTE_SYNC__H +#define SPDK_BDEV_LONGHORN_REMOTE_SYNC__H + +int longhorn_remote_sync_server(const char *addr, uint16_t port, struct spdk_lvol_store *lvs); + +#endif /* SPDK_BDEV_LONGHORN_REMOTE_SYNC__H */ + diff --git a/module/bdev/longhorn/bdev_longhorn_replica.c b/module/bdev/longhorn/bdev_longhorn_replica.c new file mode 100644 index 000000000..88e5d5518 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_replica.c @@ -0,0 +1,131 @@ +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/log.h" + +#include "bdev_longhorn_lvol.h" +#include "bdev_longhorn_replica.h" +#include "bdev_longhorn_nvmf.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" + + +void bdev_longhorn_replica_detect(const char *name) { + struct spdk_lvol_store *lvs = longhorn_get_lvol_store_by_name(name); + + +} + +void bdev_longhorn_replica_get_info(const char *name) { + struct spdk_bdev *bdev = NULL; + struct spdk_lvol *lvol = NULL; + spdk_blob_id parent_id; + + bdev = spdk_bdev_get_by_name(name); + + if (bdev != NULL) { + lvol = vbdev_lvol_get_from_bdev(bdev); + + + } + +} + +struct longhorn_replica_create_context { + char *name; + char *bdev_name; + + struct spdk_lvol_store *lvs; + + char *addr; + uint16_t port; + char *nqn; + + longhorn_replica_create_cb cb_fn; + void *cb_arg; +}; + +static void longhorn_replica_create_context_free(struct longhorn_replica_create_context *ctx) +{ + free(ctx->name); + free(ctx->addr); + free(ctx->nqn); + free(ctx); +} + +static void longhorn_replica_publish_complete_cb(void *arg) { + struct longhorn_replica_create_context *ctx = arg; + + ctx->cb_fn(ctx->lvs, ctx->name, ctx->nqn, ctx->cb_arg); + longhorn_replica_create_context_free(ctx); +} + +static void longhorn_replica_create_complete_cb(void *arg, + struct spdk_lvol *lvol, + int volerrno) +{ + + struct longhorn_replica_create_context *ctx = arg; + + + if (ctx->addr && ctx->addr[0] != '\0') { + longhorn_publish_nvmf(lvol->bdev->name, ctx->nqn, ctx->addr, ctx->port, + longhorn_replica_publish_complete_cb, ctx); + } else { + ctx->cb_fn(ctx->lvs, ctx->name, ctx->nqn, ctx->cb_arg); + longhorn_replica_create_context_free(ctx); + } + +} + +void bdev_longhorn_replica_create(struct spdk_lvol_store *lvs, + const char *name, + uint64_t size, + const char *addr, + uint16_t port, + longhorn_replica_create_cb cb_fn, + void *cb_arg) +{ + struct longhorn_replica_create_context *ctx; + struct spdk_bdev *bdev; + + /* TODO Lookup name to see if it exists. */ + + ctx = calloc(1, sizeof(*ctx)); + + ctx->name = strdup(name); + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + ctx->lvs = lvs; + ctx->addr = strdup(addr); + ctx->port = port; + + + ctx->bdev_name = spdk_sprintf_alloc("%s/%s", lvs->name, name); + ctx->nqn = spdk_sprintf_alloc(REPLICA_FORMAT, lvs->name, name); + + bdev = spdk_bdev_get_by_name(ctx->bdev_name); + if (bdev != NULL) { + longhorn_replica_create_complete_cb(ctx, + vbdev_lvol_get_from_bdev(bdev), + 0); + + + + } else { + + vbdev_lvol_create(lvs, name, size, true, LVOL_CLEAR_WITH_DEFAULT, + longhorn_replica_create_complete_cb, ctx); + } + +} + +void bdev_longhorn_replica_snapshot(struct spdk_lvol_store *lvs, + const char *name, + const char *snapshot) +{ + //vbdev_lvol_create_snapshot(struct spdk_lvol *lvol, const char *snapshot_name, + // spdk_lvol_op_with_handle_complete cb_fn, void *cb_arg +} diff --git a/module/bdev/longhorn/bdev_longhorn_replica.h b/module/bdev/longhorn/bdev_longhorn_replica.h new file mode 100644 index 000000000..31455daf0 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_replica.h @@ -0,0 +1,23 @@ +#ifndef SPDK__BDEV_LONGHORN_REPLICA_H +#define SPDK__BDEV_LONGHORN_REPLICA_H + +struct bdev_longhorn_replica { +}; + +struct bdev_longhorn_replica_info { + char *name; + uint64_t id; +}; + +typedef (*longhorn_replica_create_cb) (struct spdk_lvol_store *lvs, const char *name, const char *nqn, void *arg); + +void bdev_longhorn_replica_create(struct spdk_lvol_store *lvs, + const char *name, + uint64_t size, + const char *addr, + uint16_t port, + longhorn_replica_create_cb cb_fn, + void *cb_arg); + + +#endif /* SPDK__BDEV_LONGHORN_REPLICA_H */ diff --git a/module/bdev/longhorn/bdev_longhorn_replica_rpc.c b/module/bdev/longhorn/bdev_longhorn_replica_rpc.c new file mode 100644 index 000000000..235e2368f --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_replica_rpc.c @@ -0,0 +1,129 @@ +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/log.h" +#include "spdk_internal/lvolstore.h" +#include "bdev_longhorn_replica.h" +#include "bdev_longhorn_lvol.h" + +struct rpc_longhorn_replica { + char *name; + uint64_t size; + char *lvs; + char *addr; + uint16_t port; +}; + +static const struct spdk_json_object_decoder rpc_longhorn_replica_create_decoders[] = { + {"name", offsetof(struct rpc_longhorn_replica, name), spdk_json_decode_string, false}, + {"size", offsetof(struct rpc_longhorn_replica, size), spdk_json_decode_uint64, false}, + {"lvs", offsetof(struct rpc_longhorn_replica, lvs), spdk_json_decode_string, false}, + {"addr", offsetof(struct rpc_longhorn_replica, addr), spdk_json_decode_string, false}, + {"port", offsetof(struct rpc_longhorn_replica, port), spdk_json_decode_uint16, false}, +}; + +static void +rpc_longhorn_replica_create_cb(struct spdk_lvol_store *lvs, + const char *name, const char *nqn, void *arg) { + struct spdk_jsonrpc_request *request = arg; + + spdk_jsonrpc_send_bool_response(request, true); +} + +static void +rpc_longhorn_replica_create(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_longhorn_replica req = {}; + struct spdk_lvol_store *lvs = NULL; + + if (spdk_json_decode_object(params, rpc_longhorn_replica_create_decoders, + SPDK_COUNTOF(rpc_longhorn_replica_create_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + return; + } + + lvs = longhorn_get_lvol_store_by_name(req.lvs); + + if (lvs == NULL) { + SPDK_ERRLOG("cannot find lvs: %s\n", req.lvs); + return; + } + + bdev_longhorn_replica_create(lvs, req.name, req.size, req.addr, req.port, + rpc_longhorn_replica_create_cb, request); + +} + +SPDK_RPC_REGISTER("longhorn_replica_create", rpc_longhorn_replica_create, SPDK_RPC_RUNTIME) + + + +struct rpc_longhorn_replica_snapshot { + char *name; + char *snapshot; + char *lvs; +}; + +static const struct spdk_json_object_decoder rpc_longhorn_replica_snapshot_decoders[] = { + {"name", offsetof(struct rpc_longhorn_replica_snapshot, name), spdk_json_decode_string, false}, + {"snapshot", offsetof(struct rpc_longhorn_replica_snapshot, snapshot), spdk_json_decode_string, false}, + {"lvs", offsetof(struct rpc_longhorn_replica_snapshot, lvs), spdk_json_decode_string, false}, +}; + + +static void +rpc_longhorn_replica_do_snapshot(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_longhorn_replica_snapshot req = {}; + + if (spdk_json_decode_object(params, rpc_longhorn_replica_snapshot_decoders, + SPDK_COUNTOF(rpc_longhorn_replica_snapshot_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + return; + } + +} + +SPDK_RPC_REGISTER("longhorn_replica_snapshot", rpc_longhorn_replica_do_snapshot, SPDK_RPC_RUNTIME) + + +struct rpc_longhorn_set_external_addr { + char *addr; +}; + +static const struct spdk_json_object_decoder rpc_longhorn_set_external_addr_decoders[] = { + {"addr", offsetof(struct rpc_longhorn_set_external_addr, addr), spdk_json_decode_string, false}, +}; + +static void _longhorn_set_external_addr_cmd_cb(const char *addr, void *arg) { + struct spdk_jsonrpc_request *request = arg; + + spdk_jsonrpc_send_bool_response(request, true); + +} + +static void +rpc_longhorn_set_external_addr_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_longhorn_set_external_addr req = {}; + + if (spdk_json_decode_object(params, rpc_longhorn_set_external_addr_decoders, + SPDK_COUNTOF(rpc_longhorn_set_external_addr_decoders), + &req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + return; + } + + + longhorn_set_external_addr(req.addr, _longhorn_set_external_addr_cmd_cb, request); + +} + + +SPDK_RPC_REGISTER("longhorn_set_external_address", rpc_longhorn_set_external_addr_cmd, SPDK_RPC_RUNTIME) diff --git a/module/bdev/longhorn/bdev_longhorn_rpc.c b/module/bdev/longhorn/bdev_longhorn_rpc.c new file mode 100644 index 000000000..f53cdedc5 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_rpc.c @@ -0,0 +1,931 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/rpc.h" +#include "spdk/bdev.h" +#include "bdev_longhorn.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" + +#define RPC_MAX_BASE_BDEVS 255 +#define BUFSIZE 255 +#define ALIGN_4K 4096 + +/* + * Input structure for bdev_longhorn_get_bdevs RPC + */ +struct rpc_bdev_longhorn_get_bdevs { + /* category - all or online or configuring or offline */ + char *category; +}; + +/* + * brief: + * free_rpc_bdev_longhorn_get_bdevs function frees RPC bdev_longhorn_get_bdevs related parameters + * params: + * req - pointer to RPC request + * returns: + * none + */ +static void +free_rpc_bdev_longhorn_get_bdevs(struct rpc_bdev_longhorn_get_bdevs *req) +{ + free(req->category); +} + +/* + * Decoder object for RPC get_longhorns + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_get_bdevs_decoders[] = { + {"category", offsetof(struct rpc_bdev_longhorn_get_bdevs, category), spdk_json_decode_string}, +}; + +/* + * brief: + * rpc_bdev_longhorn_get_bdevs function is the RPC for rpc_bdev_longhorn_get_bdevs. This is used to list + * all the longhorn bdev names based on the input category requested. Category should be + * one of "all", "online", "configuring" or "offline". "all" means all the longhorns + * whether they are online or configuring or offline. "online" is the longhorn bdev which + * is registered with bdev layer. "configuring" is the longhorn bdev which does not have + * full configuration discovered yet. "offline" is the longhorn bdev which is not + * registered with bdev as of now and it has encountered any error or user has + * requested to offline the longhorn. + * params: + * request - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +rpc_bdev_longhorn_get_bdevs(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_get_bdevs req = {}; + struct spdk_json_write_ctx *w; + struct longhorn_bdev *longhorn_bdev; + + if (spdk_json_decode_object(params, rpc_bdev_longhorn_get_bdevs_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_get_bdevs_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + if (!(strcmp(req.category, "all") == 0 || + strcmp(req.category, "online") == 0 || + strcmp(req.category, "configuring") == 0 || + strcmp(req.category, "offline") == 0)) { + spdk_jsonrpc_send_error_response(request, -EINVAL, spdk_strerror(EINVAL)); + goto cleanup; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_array_begin(w); + + /* Get longhorn bdev list based on the category requested */ + if (strcmp(req.category, "all") == 0) { + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_list, global_link) { + spdk_json_write_string(w, longhorn_bdev->bdev.name); + } + } else if (strcmp(req.category, "online") == 0) { + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_configured_list, state_link) { + spdk_json_write_string(w, longhorn_bdev->bdev.name); + } + } else if (strcmp(req.category, "configuring") == 0) { + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_configuring_list, state_link) { + spdk_json_write_string(w, longhorn_bdev->bdev.name); + } + } else { + TAILQ_FOREACH(longhorn_bdev, &g_longhorn_bdev_offline_list, state_link) { + spdk_json_write_string(w, longhorn_bdev->bdev.name); + } + } + spdk_json_write_array_end(w); + spdk_jsonrpc_end_result(request, w); + +cleanup: + free_rpc_bdev_longhorn_get_bdevs(&req); +} +SPDK_RPC_REGISTER("bdev_longhorn_get_bdevs", rpc_bdev_longhorn_get_bdevs, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_longhorn_get_bdevs, get_longhorn_bdevs) + +struct longhorn_replica { + char *addr; + char *lvs; + uint16_t nvmf_port; + uint16_t comm_port; +}; + +/* + * Base bdevs in RPC bdev_longhorn_create + */ +struct rpc_bdev_longhorn_create_base_bdevs { + /* Number of base bdevs */ + size_t num_base_bdevs; + + struct longhorn_replica replicas[RPC_MAX_BASE_BDEVS]; +}; + +/* + * Input structure for RPC rpc_bdev_longhorn_create + */ +struct rpc_bdev_longhorn_create { + /* Raid bdev name */ + char *name; + + /* Base bdevs information */ + struct rpc_bdev_longhorn_create_base_bdevs base_bdevs; +}; + +/* + * brief: + * free_rpc_bdev_longhorn_create function is to free RPC bdev_longhorn_create related parameters + * params: + * req - pointer to RPC request + * returns: + * none + */ +static void +free_rpc_bdev_longhorn_create(struct rpc_bdev_longhorn_create *req) +{ + size_t i; + + free(req->name); + for (i = 0; i < req->base_bdevs.num_base_bdevs; i++) { + free(req->base_bdevs.replicas[i].addr); + } +} + + +/* + * Decoder object for RPC bdev_longhorn_create + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_create_replica_decoders[] = { + + {"lvs", offsetof(struct longhorn_replica, lvs), spdk_json_decode_string}, + {"addr", offsetof(struct longhorn_replica, addr), spdk_json_decode_string, true}, + + {"nvmf_port", offsetof(struct longhorn_replica, nvmf_port), spdk_json_decode_uint16, true}, + {"comm_port", offsetof(struct longhorn_replica, comm_port), spdk_json_decode_uint16, true} +}; + +static int json_decode_replica(const struct spdk_json_val *val, void *out) { + int error; + struct longhorn_replica *replica = out; + printf("starting json_decode_replica\n"); + printf("type %d\n", val->type); + + error = spdk_json_decode_object(val, rpc_bdev_longhorn_create_replica_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_create_replica_decoders), out); + printf("return json_decode_replica: %d\n", error); + printf("replica lvs: %s\n", replica->lvs); + printf("replica addr: %s\n", replica->addr); + printf("replica nvmf port: %u\n", replica->nvmf_port); + printf("replica comm port: %u\n", replica->comm_port); + return error; + +} +/* + * Decoder function for RPC bdev_longhorn_create to decode base bdevs list + */ +static int +decode_base_bdevs(const struct spdk_json_val *val, void *out) +{ + struct rpc_bdev_longhorn_create_base_bdevs *base_bdevs = out; + int error = 0; + + printf("starting decode_base_bdevs\n"); + printf("type %d\n", val->type); + error = spdk_json_decode_array(val, json_decode_replica, base_bdevs->replicas, + RPC_MAX_BASE_BDEVS, &base_bdevs->num_base_bdevs, sizeof(struct longhorn_replica)); + printf("return decode_base_bdevs: %d\n", error); + + return error; +} + +/* + * Decoder object for RPC bdev_longhorn_create + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_create_decoders[] = { + {"name", offsetof(struct rpc_bdev_longhorn_create, name), spdk_json_decode_string}, + {"replicas", offsetof(struct rpc_bdev_longhorn_create, base_bdevs), decode_base_bdevs}, +}; + +/* + * brief: + * rpc_bdev_longhorn_create function is the RPC for creating RAID bdevs. It takes + * input as longhorn bdev name and list of base bdev names. + * params: + * request - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +rpc_bdev_longhorn_create(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_create req = {}; + struct longhorn_bdev_config *longhorn_cfg; + int rc; + size_t i; + char *bdev_name; + + printf("type %d\n", params->type); + if (spdk_json_decode_object(params, rpc_bdev_longhorn_create_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_create_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "longhorn spdk_json_decode_object failed"); + goto cleanup; + } + +#if 0 + rc = longhorn_bdev_config_add(req.name, req.base_bdevs.num_base_bdevs, + &longhorn_cfg); + if (rc != 0) { + spdk_jsonrpc_send_error_response_fmt(request, rc, + "Failed to add RAID bdev config %s: %s", + req.name, spdk_strerror(-rc)); + goto cleanup; + } + + for (i = 0; i < req.base_bdevs.num_base_bdevs; i++) { + rc = longhorn_bdev_config_add_base_bdev(longhorn_cfg, req.base_bdevs.replicas[i].lvs, i); + if (rc != 0) { + longhorn_bdev_config_cleanup(longhorn_cfg); + spdk_jsonrpc_send_error_response_fmt(request, rc, + "Failed to add base bdev %s to RAID bdev config %s: %s", + req.base_bdevs.replicas[i].addr, req.name, + spdk_strerror(-rc)); + goto cleanup; + } + } +#endif + + rc = longhorn_bdev_create(req.name, req.base_bdevs.num_base_bdevs); + if (rc != 0) { + //longhorn_bdev_config_cleanup(longhorn_cfg); + spdk_jsonrpc_send_error_response_fmt(request, rc, + "Failed to create RAID bdev %s: %s", + req.name, spdk_strerror(-rc)); + goto cleanup; + } + + for (i = 0; i < req.base_bdevs.num_base_bdevs; i++) { + + //longhorn_bdev_add_base_device(req.name, bdev_name); + + longhorn_bdev_add_replica(req.name, req.base_bdevs.replicas[i].lvs, req.base_bdevs.replicas[i].addr, req.base_bdevs.replicas[i].nvmf_port, req.base_bdevs.replicas[i].comm_port); + + free(bdev_name); + } + +#if 0 + + + rc = longhorn_bdev_add_base_devices(longhorn_cfg); + if (rc != 0) { + spdk_jsonrpc_send_error_response_fmt(request, rc, + "Failed to add any base bdev to RAID bdev %s: %s", + req.name, spdk_strerror(-rc)); + goto cleanup; + } +#endif + + + + spdk_jsonrpc_send_bool_response(request, true); + +cleanup: + free_rpc_bdev_longhorn_create(&req); +} +SPDK_RPC_REGISTER("longhorn_volume_create", rpc_bdev_longhorn_create, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(longhorn_volume_create, bdev_longhorn_create) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(longhorn_volume_create, construct_longhorn_bdev) + + +/* + * Input structure for RPC rpc_bdev_longhorn_create + */ +struct rpc_bdev_longhorn_add_replica { + /* Raid bdev name */ + char *name; + + /* Base bdevs information */ + struct longhorn_replica replica; +}; + +/* + * Decoder object for RPC bdev_longhorn_create + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_add_replica_decoders[] = { + {"name", offsetof(struct rpc_bdev_longhorn_add_replica, name), spdk_json_decode_string}, + {"replica", offsetof(struct rpc_bdev_longhorn_add_replica, replica), json_decode_replica}, +}; + +/* + * brief: + * rpc_bdev_longhorn_create function is the RPC for creating RAID bdevs. It takes + * input as longhorn bdev name and list of base bdev names. + * params: + * request - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +rpc_bdev_longhorn_add_replica_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_add_replica req = {}; + struct longhorn_bdev_config *longhorn_cfg; + int rc; + size_t i; + char *bdev_name; + + printf("type %d\n", params->type); + if (spdk_json_decode_object(params, rpc_bdev_longhorn_add_replica_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_add_replica_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "longhorn spdk_json_decode_object failed"); + } +} + +SPDK_RPC_REGISTER("longhorn_volume_add_replica", rpc_bdev_longhorn_add_replica_cmd, SPDK_RPC_RUNTIME) + +static void +rpc_bdev_longhorn_remove_replica_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_add_replica req = {}; + struct longhorn_bdev_config *longhorn_cfg; + int rc; + size_t i; + char *bdev_name; + + printf("type %d\n", params->type); + if (spdk_json_decode_object(params, rpc_bdev_longhorn_add_replica_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_add_replica_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "longhorn spdk_json_decode_object failed"); + return; + } + + rc = longhorn_bdev_remove_replica(req.name, req.replica.lvs, req.replica.addr, req.replica.nvmf_port, req.replica.comm_port); + + + if (rc != 0) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "failed to remove replica"); + } else { + spdk_jsonrpc_send_bool_response(request, true); + + } +} + +SPDK_RPC_REGISTER("longhorn_volume_remove_replica", rpc_bdev_longhorn_remove_replica_cmd, SPDK_RPC_RUNTIME) + + +/* + * Input structure for RPC deleting a longhorn bdev + */ +struct rpc_bdev_longhorn_delete { + /* longhorn bdev name */ + char *name; +}; + +/* + * brief: + * free_rpc_bdev_longhorn_delete function is used to free RPC bdev_longhorn_delete related parameters + * params: + * req - pointer to RPC request + * params: + * none + */ +static void +free_rpc_bdev_longhorn_delete(struct rpc_bdev_longhorn_delete *req) +{ + free(req->name); +} + +/* + * Decoder object for RPC longhorn_bdev_delete + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_delete_decoders[] = { + {"name", offsetof(struct rpc_bdev_longhorn_delete, name), spdk_json_decode_string}, +}; + +struct rpc_bdev_longhorn_delete_ctx { + struct rpc_bdev_longhorn_delete req; + struct longhorn_bdev_config *longhorn_cfg; + struct spdk_jsonrpc_request *request; +}; + +/* + * brief: + * params: + * cb_arg - pointer to the callback context. + * rc - return code of the deletion of the longhorn bdev. + * returns: + * none + */ +static void +bdev_longhorn_delete_done(void *cb_arg, int rc) +{ + struct rpc_bdev_longhorn_delete_ctx *ctx = cb_arg; + struct longhorn_bdev_config *longhorn_cfg; + struct spdk_jsonrpc_request *request = ctx->request; + + if (rc != 0) { + SPDK_ERRLOG("Failed to delete longhorn bdev %s (%d): %s\n", + ctx->req.name, rc, spdk_strerror(-rc)); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + spdk_strerror(-rc)); + goto exit; + } + + longhorn_cfg = ctx->longhorn_cfg; + assert(longhorn_cfg->longhorn_bdev == NULL); + + longhorn_bdev_config_cleanup(longhorn_cfg); + + spdk_jsonrpc_send_bool_response(request, true); +exit: + free_rpc_bdev_longhorn_delete(&ctx->req); + free(ctx); +} + +/* + * brief: + * rpc_bdev_longhorn_delete function is the RPC for deleting a longhorn bdev. It takes longhorn + * name as input and delete that longhorn bdev including freeing the base bdev + * resources. + * params: + * request - pointer to json rpc request + * params - pointer to request parameters + * returns: + * none + */ +static void +rpc_bdev_longhorn_delete(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_delete_ctx *ctx; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM)); + return; + } + + if (spdk_json_decode_object(params, rpc_bdev_longhorn_delete_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_delete_decoders), + &ctx->req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + ctx->longhorn_cfg = longhorn_bdev_config_find_by_name(ctx->req.name); + if (ctx->longhorn_cfg == NULL) { + spdk_jsonrpc_send_error_response_fmt(request, ENODEV, + "longhorn bdev %s is not found in config", + ctx->req.name); + goto cleanup; + } + + ctx->request = request; + + /* Remove all the base bdevs from this longhorn bdev before deleting the longhorn bdev */ + longhorn_bdev_remove_base_devices(ctx->longhorn_cfg, bdev_longhorn_delete_done, ctx); + + return; + +cleanup: + free_rpc_bdev_longhorn_delete(&ctx->req); + free(ctx); +} +SPDK_RPC_REGISTER("bdev_longhorn_delete", rpc_bdev_longhorn_delete, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_longhorn_delete, destroy_longhorn_bdev) + +struct cluster_entry { + int cluster; + + TAILQ_ENTRY(cluster_entry) entries; +}; + +struct read_sparse_context { + struct spdk_blob_store *blobstore; + struct spdk_blob *blob; + struct spdk_io_channel *channel; + const char *name; + uint64_t id; + uint64_t num_clusters; + uint32_t cluster_size; + uint32_t io_unit_size; + uint64_t current_cluster; + uint64_t allocated_clusters; + uint64_t start_offset; + + FILE *fp; + uint8_t *cluster; + + TAILQ_HEAD(, cluster_entry) cluster_head; + struct cluster_entry *current; +}; + +static void +read_cluster_cb(void *arg1, int bserrno); +static void blob_it_cb(void *arg1, struct spdk_blob *blob, int bserrno); + +static void +read_next_allocated_cluster(struct read_sparse_context *ctx) { + uint64_t offset; + + if (ctx->current) { + offset = ctx->current->cluster * ctx->cluster_size / ctx->io_unit_size; + + printf("reading at %" PRIu64 ":%" PRIu64 "\n", offset, ctx->cluster_size); + + spdk_blob_io_read(ctx->blob, ctx->channel, ctx->cluster, offset, + ctx->cluster_size / ctx->io_unit_size, read_cluster_cb, ctx); + } else { + fclose(ctx->fp); + } +} + +static void +read_cluster_cb(void *arg1, int bserrno) +{ + struct read_sparse_context *ctx = arg1; + uint32_t nwritten; + + if (bserrno) { + printf("error: %d\n", bserrno); + fclose(ctx->fp); + spdk_bs_iter_next(ctx->blobstore, ctx->blob, blob_it_cb, ctx->blobstore); + return; + } + + printf("successful read\n"); + + nwritten = fwrite(ctx->cluster, 1, ctx->cluster_size, ctx->fp); + printf("nwritten %u ? %u\n", nwritten, ctx->cluster_size); + + if (nwritten != ctx->cluster_size) { + printf("nwritten not euqal to cluster size\n"); + fclose(ctx->fp); + spdk_bs_iter_next(ctx->blobstore, ctx->blob, blob_it_cb, ctx->blobstore); + return; + } + + + if (++ctx->current_cluster < ctx->allocated_clusters && ctx->current != NULL) { + + struct cluster_entry *tmp = TAILQ_NEXT(ctx->current, entries); + printf("next cluster = %d\n", tmp->cluster); + ctx->current = tmp; + + read_next_allocated_cluster(ctx); + } else { + + printf("complete\n"); + fclose(ctx->fp); + //spdk_blob_close(ctx->blob, close_cb, ctx); + spdk_bs_iter_next(ctx->blobstore, ctx->blob, blob_it_cb, ctx->blobstore); + + } + + +} + + +static void open_file(struct read_sparse_context *ctx) +{ + char filename[BUFSIZE] = {'\0'}; + uint64_t blocks; + struct cluster_entry *entry; + long offset; + + if (ctx->name) { + snprintf(filename, BUFSIZE - 1, "%s.kdat", ctx->name); + } else { + snprintf(filename, BUFSIZE - 1, "%" PRIx64 ".kdat", ctx->id); + } + + printf("opening %s\n", filename); + ctx->fp = fopen(filename, "w"); + + if (ctx->fp) { + fwrite(&ctx->num_clusters, sizeof (uint64_t), 1, ctx->fp); + fwrite(&ctx->allocated_clusters, sizeof (uint64_t), 1, ctx->fp); + fwrite(&ctx->cluster_size, sizeof (uint32_t), 1, ctx->fp); + fwrite(&ctx->io_unit_size, sizeof (uint32_t), 1, ctx->fp); + + TAILQ_FOREACH(entry, &ctx->cluster_head, entries) { + fwrite(&entry->cluster, sizeof (int), 1, ctx->fp); + } + + + offset = ftell(ctx->fp); + + ctx->start_offset = offset; + + if (offset % ctx->io_unit_size != 0) { + ctx->start_offset = ((offset / ctx->io_unit_size) + 1) * + ctx->io_unit_size; + } + + fseek(ctx->fp, ctx->start_offset, SEEK_SET); + + ctx->cluster = spdk_malloc(ctx->cluster_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + ctx->channel = spdk_bs_alloc_io_channel(ctx->blobstore); + + ctx->current = TAILQ_FIRST(&ctx->cluster_head); + + read_next_allocated_cluster(ctx); + } + +} + + +static void blob_it_cb(void *arg1, struct spdk_blob *blob, int bserrno) +{ + struct spdk_blob_store *blobstore = arg1; + uint64_t val; + struct spdk_xattr_names *names; + const void *value; + size_t value_len; + unsigned int i; + struct cluster_entry *entry; + struct read_sparse_context *ctx = NULL; + char *xattr_name = NULL; + + if (bserrno) { + if (bserrno == -ENOENT) { + printf("last blob\n"); + } else { + printf("error blob: %d\n", bserrno); + } + return; + } + ctx = calloc(1, sizeof(struct read_sparse_context)); + + printf("Blob ID: %" PRIx64 "\n", spdk_blob_get_id(blob)); + printf("Blob Parent ID: %" PRIx64 "\n", blob->parent_id); + + val = spdk_blob_get_num_clusters(blob); + printf("# of clusters: %" PRIu64 "\n", val); + printf("cluster size: %d\n", blobstore->cluster_sz); + printf("io unit size: %d\n", blobstore->io_unit_size); + + ctx->id = spdk_blob_get_id(blob); + ctx->num_clusters = blob->active.num_clusters; + ctx->cluster_size = blobstore->cluster_sz; + ctx->io_unit_size = blobstore->io_unit_size; + ctx->blobstore = blobstore; + ctx->blob = blob; + + val = spdk_blob_get_num_pages(blob); + printf("# of pages: %" PRIu64 "\n", val); + printf("# of pages per cluster: %" PRIu64 "\n", blobstore->pages_per_cluster); + + spdk_blob_get_xattr_names(blob, &names); + + printf("# of xattrs: %d\n", spdk_xattr_names_get_count(names)); + printf("xattrs:\n"); + for (i = 0; i < spdk_xattr_names_get_count(names); i++) { + xattr_name = spdk_xattr_names_get_name(names, i); + + spdk_blob_get_xattr_value(blob, xattr_name, + &value, &value_len); + if (value_len > BUFSIZE) { + printf("FYI: adjusting size of xattr due to CLI limits.\n"); + value_len = BUFSIZE + 1; + } + + if (strcmp(xattr_name, "name") == 0) { + ctx->name = strdup(value); + } + + printf("\n(%d) Name:%s\n", i, xattr_name); + printf("(%d) Value:\n", i); + spdk_log_dump(stdout, "", value, value_len - 1); + + } + + + TAILQ_INIT(&ctx->cluster_head); + for (i = 0; i < blob->active.num_clusters; ++i) { + if (blob->active.clusters[i] != 0) { + entry = malloc(sizeof(struct cluster_entry)); + entry->cluster = i; + TAILQ_INSERT_TAIL(&ctx->cluster_head, entry, entries); + printf("Cluster %d LBA: %" PRIu64 "\n", i, blob->active.clusters[i]); + ++ctx->allocated_clusters; + } + } + + printf("Allocated clusters: %d\n", ctx->allocated_clusters); + TAILQ_FOREACH(entry, &ctx->cluster_head, entries) { + printf("Cluster %d\n", entry->cluster); + + } + + if (ctx->allocated_clusters > 0) { + open_file(ctx); + } else { + free(ctx); + spdk_bs_iter_next(blobstore, blob, blob_it_cb, blobstore); + } + + + +} + +struct rpc_lvol_show_blobs_param { + char *lvs; +}; + +static const struct spdk_json_object_decoder rpc_lvol_show_blobs_decoders[] = { + {"lvs", offsetof(struct rpc_lvol_show_blobs_param, lvs), spdk_json_decode_string}, +}; + +static void spdk_bsdump_done(void *arg, int bserrno) { +} +static void +bsdump_print_xattr(FILE *fp, const char *bstype, const char *name, const void *value, + size_t value_len) +{ + if (strncmp(bstype, "BLOBFS", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%.*s", (int)value_len, (char *)value); + } else if (strcmp(name, "length") == 0 && value_len == sizeof(uint64_t)) { + uint64_t length; + + memcpy(&length, value, sizeof(length)); + fprintf(fp, "%" PRIu64, length); + } else { + fprintf(fp, "?"); + } + } else if (strncmp(bstype, "LVOLSTORE", SPDK_BLOBSTORE_TYPE_LENGTH) == 0) { + if (strcmp(name, "name") == 0) { + fprintf(fp, "%s", (char *)value); + } else if (strcmp(name, "uuid") == 0 && value_len == sizeof(struct spdk_uuid)) { + char uuid[SPDK_UUID_STRING_LEN]; + + spdk_uuid_fmt_lower(uuid, sizeof(uuid), (struct spdk_uuid *)value); + fprintf(fp, "%s", uuid); + } else { + fprintf(fp, "?"); + } + } else { + fprintf(fp, "?"); + } +} + +static void rpc_lvol_show_blobs(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct spdk_lvol_store *lvs; + struct lvol_store_bdev *lvs_bdev; + struct rpc_lvol_show_blobs_param req; + +#if 0 + + if (spdk_json_decode_object(params, rpc_lvol_show_blobs_decoders, + SPDK_COUNTOF(rpc_lvol_show_blobs_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } +#endif + + //lvs = vbdev_get_lvol_store_by_name(req.lvs); + lvs_bdev = vbdev_lvol_store_first(); + lvs = lvs_bdev->lvs; + + + while (lvs_bdev != NULL) { + lvs = lvs_bdev->lvs; + + spdk_bs_iter_first(lvs->blobstore, blob_it_cb, lvs->blobstore); + lvs_bdev = vbdev_lvol_store_next(lvs_bdev); + //spdk_bs_dump(lvs->blobstore->dev, stdout, bsdump_print_xattr, spdk_bsdump_done, NULL); + } + + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "egg"); +} + +SPDK_RPC_REGISTER("lvol_show_blobs", rpc_lvol_show_blobs, SPDK_RPC_RUNTIME) + +struct rpc_bdev_longhorn_snapshot { + char *name; + char *snapshot; +}; + +static void +free_rpc_bdev_longhorn_snapshot(struct rpc_bdev_longhorn_snapshot *req) +{ + free(req->name); + free(req->snapshot); +} + +/* + * Decoder object for RPC longhorn_bdev_delete + */ +static const struct spdk_json_object_decoder rpc_bdev_longhorn_snapshot_decoders[] = { + {"name", offsetof(struct rpc_bdev_longhorn_snapshot, name), spdk_json_decode_string}, + {"snapshot", offsetof(struct rpc_bdev_longhorn_snapshot, snapshot), spdk_json_decode_string}, +}; + +struct rpc_bdev_longhorn_snapshot_ctx { + char *name; + char *snapshot; + struct spdk_jsonrpc_request *request; +}; + +static void rpc_longhorn_snapshot_complete(void *cb_arg) { + struct rpc_bdev_longhorn_snapshot_ctx *ctx = cb_arg; +} + + + +static void rpc_longhorn_snapshot_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_longhorn_snapshot req; + struct rpc_bdev_longhorn_snapshot_ctx *ctx; + struct longhorn_bdev_config *longhorn_cfg; + + if (spdk_json_decode_object(params, rpc_bdev_longhorn_snapshot_decoders, + SPDK_COUNTOF(rpc_bdev_longhorn_snapshot_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + return; + } + + + longhorn_cfg = longhorn_bdev_config_find_by_name(req.name); + if (longhorn_cfg == NULL) { + spdk_jsonrpc_send_error_response_fmt(request, ENODEV, + "longhorn bdev %s is not found in config", + req.name); +//goto cleanup; + } + + + ctx = calloc(1, sizeof(*ctx)); + + + + + +} + +SPDK_RPC_REGISTER("bdev_longhorn_snapshot", rpc_longhorn_snapshot_cmd, SPDK_RPC_RUNTIME) + + diff --git a/module/bdev/longhorn/bdev_longhorn_snapshot.c b/module/bdev/longhorn/bdev_longhorn_snapshot.c new file mode 100644 index 000000000..8888eb5b4 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_snapshot.c @@ -0,0 +1,348 @@ +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/jsonrpc.h" +#include "spdk/thread.h" +#include "spdk/lvol.h" +#include "spdk/log.h" +#include "spdk/bdev.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "bdev_longhorn.h" +#include "bdev_longhorn_remote.h" +#include "bdev_longhorn_snapshot.h" + +struct remote_snapshot_context { + spdk_lvol_op_complete cb_fn; + void *cb_arg; +}; + +static void remote_snapshot_complete(const char *addr, + const char *command, + int32_t id, + struct spdk_json_val *result, + struct spdk_json_val *error, + void *arg) +{ + struct remote_snapshot_context *ctx = arg; + + if (error != NULL) { + printf("error not null\n"); + }else { + printf("error null\n"); + } + + ctx->cb_fn(ctx->cb_arg, 0); + + free(ctx); +} + +void bdev_longhorn_snapshot_remote(const char *addr, + const char *name, + const char *lvs, + const char *snapshot, + spdk_lvol_op_complete cb_fn, + void *cb_arg) { + char *remote_name; + + struct spdk_json_write_ctx *w; + struct spdk_jsonrpc_client_request *request; + struct remote_snapshot_context *ctx; + struct spdk_lvol_store *store; + + remote_name = spdk_sprintf_alloc("%s/%s", lvs, name); + + json_remote_client(addr); + + request = spdk_jsonrpc_client_create_request(); + + w = spdk_jsonrpc_begin_request(request, 1, "bdev_lvol_snapshot"); + spdk_json_write_name(w, "params"); + spdk_json_write_object_begin(w); + spdk_json_write_name(w, "lvol_name"); + spdk_json_write_string(w, remote_name); + spdk_json_write_name(w, "snapshot_name"); + spdk_json_write_string(w, snapshot); + spdk_json_write_object_end(w); + + spdk_jsonrpc_end_request(request, w); + + free(remote_name); + + ctx = calloc(1, sizeof(*ctx)); + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + json_remote_client_send_command(addr, "bdev_lvol_snapshot", + 1, request, + remote_snapshot_complete, ctx); + + +} + +struct compare_ctx { + struct block_diff diff; + + compare_bdev_cb cb_fn; + void *cb_arg; + + struct spdk_bdev *bdev1; + struct spdk_bdev *bdev2; + + struct spdk_bdev_desc *desc1; + struct spdk_bdev_desc *desc2; + + struct spdk_io_channel *channel1; + struct spdk_io_channel *channel2; + + uint8_t *block1; + uint8_t *block2; + + uint64_t block_num; + + uint64_t size1; + uint64_t size2; + + uint64_t total_blocks; +}; + +void compare_bdev_event_cb(enum spdk_bdev_event_type type, + struct spdk_bdev *bdev, + void *event_ctx) +{ + +} + +static void compare_free_ctx(struct compare_ctx *ctx) { + free(ctx); +} + +static void read_bdev1(struct compare_ctx *ctx); + +static void read_bdev2_cb(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg) { + + struct compare_ctx *ctx = cb_arg; + struct block *bad_block; + + if (success) { + if (memcpy(ctx->block1, ctx->block2, ctx->diff.blocksize) != 0) { + bad_block = calloc(1, sizeof(*bad_block)); + bad_block->block = ctx->block_num; + + TAILQ_INSERT_TAIL(&ctx->diff.blocks, bad_block, next); + + ctx->diff.num_diff++; + } + + ctx->block_num++; + + if (ctx->block_num >= ctx->total_blocks) { + ctx->cb_fn(ctx->diff.num_diff == 0, + &ctx->diff, ctx->cb_arg); + compare_free_ctx(ctx); + } else { + read_bdev1(ctx); + } + } else { + ctx->cb_fn(-1, &ctx->diff, ctx->cb_arg); + compare_free_ctx(ctx); + } + +} + +static void read_bdev1_cb(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg) { + struct compare_ctx *ctx = cb_arg; + + if (success) { + spdk_bdev_read(ctx->desc2, ctx->channel2, ctx->block2, + ctx->block_num * ctx->diff.blocksize, + ctx->diff.blocksize, read_bdev2_cb, ctx); + } else { + ctx->cb_fn(-1, &ctx->diff, ctx->cb_arg); + compare_free_ctx(ctx); + } +} + +static void read_bdev1(struct compare_ctx *ctx) { + spdk_bdev_read(ctx->desc1, ctx->channel1, ctx->block1, + ctx->block_num * ctx->diff.blocksize, + ctx->diff.blocksize, read_bdev1_cb, ctx); +} + +static uint64_t bdev_get_size(struct spdk_bdev *bdev) { + return spdk_bdev_get_block_size(bdev) * spdk_bdev_get_num_blocks(bdev); +} + + +void bdev_longhorn_compare(const char *bdev_name1, + const char *bdev_name2, + uint64_t blocksize, + compare_bdev_cb cb_fn, + void *cb_arg) { + struct compare_ctx *ctx; + int rc; + + + ctx = calloc(1, sizeof (*ctx)); + + TAILQ_INIT(&ctx->diff.blocks); + ctx->diff.blocksize = blocksize; + ctx->cb_fn = cb_fn; + ctx->cb_arg = cb_arg; + + rc = spdk_bdev_open_ext(bdev_name1, true, compare_bdev_event_cb, + ctx, &ctx->desc1); + + rc = spdk_bdev_open_ext(bdev_name2, true, compare_bdev_event_cb, + ctx, &ctx->desc2); + + ctx->bdev1 = spdk_bdev_desc_get_bdev(ctx->desc1); + ctx->bdev2 = spdk_bdev_desc_get_bdev(ctx->desc2); + + ctx->channel1 = spdk_bdev_get_io_channel(ctx->desc1); + ctx->channel2 = spdk_bdev_get_io_channel(ctx->desc2); + + + ctx->block1 = spdk_malloc(blocksize, 4096, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + ctx->block2 = spdk_malloc(blocksize, 4096, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + + ctx->size1 = bdev_get_size(ctx->bdev1); + ctx->size2 = bdev_get_size(ctx->bdev2); + + ctx->total_blocks = spdk_min(ctx->size1, ctx->size2) / blocksize; + +} + +struct longhorn_bdev_snapshot_ctx { + uint32_t num_to_snapshot; + uint32_t snapshots_complete; + struct longhorn_bdev *longhorn_bdev; +}; + +static void longhorn_bdev_snapshot_complete(void *cb_arg, + struct spdk_lvol *lvol, + int lvolerrno) +{ + struct longhorn_bdev_snapshot_ctx *ctx = cb_arg; + + ctx->snapshots_complete++; + + SPDK_ERRLOG("%d snapshots complete %d %s\n", ctx->snapshots_complete, lvolerrno, strerror(-lvolerrno)); + + if (ctx->snapshots_complete >= ctx->num_to_snapshot) { + longhorn_unpause(ctx->longhorn_bdev); + + free(ctx); + } +} + +static void longhorn_bdev_snapshot(struct longhorn_bdev *longhorn_bdev, + const char *snapshot) +{ + + struct longhorn_base_bdev_info *base_info; + struct longhorn_bdev_snapshot_ctx *ctx; + struct spdk_bdev *bdev; + struct spdk_lvol *lvol; + + + SPDK_DEBUGLOG(bdev_longhorn, "longhorn_bdev_remove_base_devices\n"); + + ctx = calloc(1, sizeof (*ctx)); + ctx->num_to_snapshot = longhorn_bdev->num_base_bdevs; + ctx->snapshots_complete = 0; + ctx->longhorn_bdev = longhorn_bdev; + + TAILQ_FOREACH(base_info, &longhorn_bdev->base_bdevs_head, infos) { + if (base_info->bdev == NULL) { + continue; + } + + if (base_info->is_local) { + //bdev = base_info->bdev; + bdev = spdk_bdev_get_by_name(base_info->bdev_name); + lvol = vbdev_lvol_get_from_bdev(bdev); + + if (lvol == NULL) { + SPDK_ERRLOG("lvol null\n"); + } + + spdk_lvol_create_snapshot(lvol, snapshot, + longhorn_bdev_snapshot_complete, + ctx); + + + } else { + ctx->snapshots_complete++; + } + } +} + + + +struct longhorn_snapshot_ctx { + const char *name; + const char *snapshot_name; +}; + +static void longhorn_snapshot_pause_complete(struct longhorn_bdev *bdev, + void *arg) { + struct longhorn_snapshot_ctx *ctx = arg; + + longhorn_bdev_snapshot(bdev, ctx->snapshot_name); + + free(ctx->name); + free(ctx->snapshot_name); + free(ctx); +} + +int +longhorn_volume_snapshot(const char *name, const char *snapshot_name) { + struct longhorn_bdev *longhorn_bdev; + struct longhorn_bdev_io_channel *io_channel; + int rc; + struct longhorn_snapshot_ctx *ctx; + + longhorn_bdev = longhorn_bdev_find_by_name(name); + if (!longhorn_bdev) { + SPDK_ERRLOG("Longhorn bdev '%s' is not created yet\n", name); + return -ENODEV; + } + + ctx = calloc(1, sizeof(*ctx)); + ctx->name = strdup(name); + ctx->snapshot_name = strdup(snapshot_name); + + rc = pthread_mutex_trylock(&longhorn_bdev->base_bdevs_mutex); + + longhorn_volume_add_pause_cb(longhorn_bdev, + longhorn_snapshot_pause_complete, + ctx); + + if (rc != 0) { + if (errno == EBUSY) { + SPDK_ERRLOG("Longhorn bdev '%s' is busy\n", name); + } + + + return -errno; + } + + TAILQ_FOREACH(io_channel, &longhorn_bdev->io_channel_head, channels) { + spdk_thread_send_msg(io_channel->thread, bdev_longhorn_pause_io, io_channel); + } + + pthread_mutex_unlock(&longhorn_bdev->base_bdevs_mutex); + + return 0; +} + + diff --git a/module/bdev/longhorn/bdev_longhorn_snapshot.h b/module/bdev/longhorn/bdev_longhorn_snapshot.h new file mode 100644 index 000000000..0c9c6f3db --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_snapshot.h @@ -0,0 +1,37 @@ +#ifndef _BDEV_LONGHORN_SNAPSHOT__H_ +#define _BDEV_LONGHORN_SNAPSHOT__H_ +#include "spdk/lvol.h" + + +void bdev_longhorn_snapshot_remote(const char *addr, + const char *name, + const char *lvs, + const char *snapshot, + spdk_lvol_op_complete cb_fn, + void *cb_arg); + +struct block { + uint64_t block; + TAILQ_ENTRY(block) next; +}; + +struct block_diff { + TAILQ_HEAD(, block) blocks; + + uint64_t blocksize; + + uint64_t num_diff; +}; + + +typedef void (*compare_bdev_cb)(int status, struct block_diff *diff, void *arg); + +void bdev_longhorn_compare(const char *bdev_name1, + const char *bdev_name2, + uint64_t blocksize, + compare_bdev_cb cb_fn, + void *cb_arg); +int +longhorn_volume_snapshot(const char *name, const char *snapshot_name); + +#endif /* _BDEV_LONGHORN_SNAPSHOT__H_ */ diff --git a/module/bdev/longhorn/bdev_longhorn_snapshot_rpc.c b/module/bdev/longhorn/bdev_longhorn_snapshot_rpc.c new file mode 100644 index 000000000..cf7c5456d --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_snapshot_rpc.c @@ -0,0 +1,51 @@ +#include "spdk/rpc.h" +#include "spdk/bdev.h" +#include "bdev_longhorn.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk/log.h" +#include "spdk/env.h" +#include "bdev_longhorn_snapshot.h" + +struct rpc_longhorn_volume_snapshot { + char *name; + char *snapshot_name; +}; + +static void +free_rpc_longhorn_volume_snapshot(struct rpc_longhorn_volume_snapshot *req) { + free(req->name); + free(req->snapshot_name); +} + +static const struct spdk_json_object_decoder rpc_longhorn_volume_snapshot_decoders[] = { + {"name", offsetof(struct rpc_longhorn_volume_snapshot, name), spdk_json_decode_string}, + {"snapshot_name", offsetof(struct rpc_longhorn_volume_snapshot, snapshot_name), spdk_json_decode_string}, +}; + +static void +rpc_longhorn_volume_snapshot_cmd(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_longhorn_volume_snapshot req = {}; + + if (spdk_json_decode_object(params, rpc_longhorn_volume_snapshot_decoders, + SPDK_COUNTOF(rpc_longhorn_volume_snapshot_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "longhorn spdk_json_decode_object failed"); + return; + } + + if (longhorn_volume_snapshot(req.name, req.snapshot_name)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "unable to perform snapshot"); + } else { + spdk_jsonrpc_send_bool_response(request, true); + } + + free_rpc_longhorn_volume_snapshot(&req); +} + +SPDK_RPC_REGISTER("longhorn_volume_snapshot", rpc_longhorn_volume_snapshot_cmd, SPDK_RPC_RUNTIME) + diff --git a/module/bdev/longhorn/bdev_longhorn_sync.c b/module/bdev/longhorn/bdev_longhorn_sync.c new file mode 100644 index 000000000..edb6dd820 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_sync.c @@ -0,0 +1,146 @@ +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/jsonrpc.h" +#include "spdk/thread.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" + +#define ALIGN_4K 4096 + + +struct sync_context { + struct spdk_bdev *snapshot_bdev; + struct spdk_bdev_desc *snapshot_desc; + struct spdk_io_channel *snapshot_channel; + + uint64_t num_clusters; + uint64_t allocated_clusters; + uint32_t cluster_size; + uint32_t io_unit_size; + uint32_t *table; + + uint64_t io_units_per_cluster; + + uint8_t *cluster; + uint64_t pos; + struct iovec iov; + + int iovcnt; + + struct spdk_lvol *lvol; + struct spdk_io_channel *channel; + +}; + +static void longhorn_snapshot_read_cluster(struct sync_context *ctx); + +static uint64_t longhorn_get_cluster_offset(struct sync_context *ctx) { + uint64_t offset = ctx->table[ctx->pos] * ctx->io_units_per_cluster; + + return offset; +} + +static void lvol_close_cb(void *cb_arg, int lvolerrno) { +} + +static void longhorn_snapshot_write_cluster_cb(void *cb_arg, int bserrno) { + struct sync_context *ctx = cb_arg; + + if (++ctx->pos < ctx->allocated_clusters) { + longhorn_snapshot_read_cluster(ctx); + } else { + /* done */ + spdk_bdev_close(ctx->snapshot_desc); + spdk_lvol_close(ctx->lvol, lvol_close_cb, NULL); + free(ctx->table); + free(ctx); + } +} + +static void longhorn_snapshot_read_cluster_cb(struct spdk_bdev_io *bdev_io, + bool success, + void *cb_arg) +{ + struct sync_context *ctx = cb_arg; + + + spdk_blob_io_write(ctx->lvol->blob, ctx->channel, + ctx->cluster, longhorn_get_cluster_offset(ctx), + ctx->io_units_per_cluster, + longhorn_snapshot_write_cluster_cb, + ctx); + +} + +static void longhorn_snapshot_read_cluster(struct sync_context *ctx) { + + + printf("reading %lu (cluster %u (%u)) (size = %u \n", ctx->table[ctx->pos] * ctx->cluster_size, ctx->table[ctx->pos], ctx->pos, ctx->cluster_size); + + spdk_bdev_read(ctx->snapshot_desc, ctx->snapshot_channel, + ctx->cluster, + ctx->table[ctx->pos] * ctx->cluster_size, + ctx->cluster_size, + longhorn_snapshot_read_cluster_cb, + ctx); + +} + + +static void longhorn_snapshot_lvol_create_complete_cb(void *arg, + struct spdk_lvol *lvol, + int lvolerrno) { + struct sync_context *ctx = arg; + + ctx->lvol = lvol; + + ctx->pos = 0; + printf("lvol created\n"); + + longhorn_snapshot_read_cluster(ctx); +} + +void snapshot_bdev_event_cb(enum spdk_bdev_event_type type, + struct spdk_bdev *bdev, + void *event_ctx) +{ +} + +void longhorn_snapshot_bdev_sync(const char *snapshot_bdev_name, + const char *name, + struct spdk_lvol_store *lvs, + uint64_t num_clusters, + uint64_t allocated_clusters, + uint32_t cluster_size, + uint32_t io_unit_size, + uint32_t *table) +{ + struct sync_context *ctx; + + ctx = calloc(1, sizeof(struct sync_context)); + + spdk_bdev_open_ext(snapshot_bdev_name, false, snapshot_bdev_event_cb, + ctx, &ctx->snapshot_desc); + + ctx->snapshot_bdev = spdk_bdev_desc_get_bdev(ctx->snapshot_desc); + ctx->snapshot_channel = spdk_bdev_get_io_channel(ctx->snapshot_desc); + + ctx->cluster = spdk_malloc(cluster_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + + ctx->channel = spdk_bs_alloc_io_channel(lvs->blobstore); + + ctx->num_clusters = num_clusters; + ctx->allocated_clusters = allocated_clusters; + ctx->cluster_size = cluster_size; + ctx->io_unit_size = io_unit_size; + ctx->table = table; + + ctx->io_units_per_cluster = ctx->cluster_size / ctx->io_unit_size; + + vbdev_lvol_create(lvs, name, ctx->num_clusters * ctx->cluster_size, true, LVOL_CLEAR_WITH_DEFAULT, longhorn_snapshot_lvol_create_complete_cb, ctx); +} + + diff --git a/module/bdev/longhorn/bdev_longhorn_sync.h b/module/bdev/longhorn/bdev_longhorn_sync.h new file mode 100644 index 000000000..0a57229d5 --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_sync.h @@ -0,0 +1,12 @@ +#ifndef SPDK__BDEV_LONGHORN_SYNC_H +#define SPDK__BDEV_LONGHORN_SYNC_H +void longhorn_snapshot_bdev_sync(const char *snapshot_bdev_name, + const char *name, + struct spdk_lvol_store *lvs, + uint64_t num_clusters, + uint64_t allocated_clusters, + uint32_t cluster_size, + uint32_t io_unit_size, + uint32_t *table); + +#endif /* SPDK__BDEV_LONGHORN_SYNC_H */ diff --git a/module/bdev/longhorn/bdev_longhorn_sync_client.c b/module/bdev/longhorn/bdev_longhorn_sync_client.c new file mode 100644 index 000000000..72e1cde8e --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_sync_client.c @@ -0,0 +1,248 @@ +#include "spdk/stdinc.h" +#include "spdk/env.h" +#include "spdk/jsonrpc.h" +#include "spdk/thread.h" +#include "spdk_internal/lvolstore.h" +#include "../lvol/vbdev_lvol.h" +#include "lib/blob/blobstore.h" + +#include "bdev_longhorn_lvol.h" + +#include +#include +#include + +#define ALIGN_4K 4096 + +enum state { + NAME, + HEADER, + TABLE, + DATA, + DONE +}; + +struct longhorn_sync_client_context { + uint64_t blob_id; + int fd; + + struct spdk_lvol_store *lvs; + struct spdk_lvol *lvol; + struct spdk_io_channel *channel; + + + + enum state state; + size_t remaining; + char name[256]; + struct longhorn_lvol_header header; + + uint32_t *table; + + uint8_t *current; + + uint64_t io_units_per_cluster; + + uint8_t *cluster; + uint64_t pos; + + bool write_in_progress; + + struct spdk_poller *poller; +}; + +static int longhorn_read(struct longhorn_sync_client_context *ctx) { + ssize_t nread; + + if (ctx->remaining == 0) return 1; + + printf("reading %d\n", ctx->remaining); + nread = read(ctx->fd, ctx->current, ctx->remaining); + printf("read %d\n", nread); + + if (nread > 0) { + ctx->remaining -= nread; + ctx->current += nread; + } else if (nread < 0) { + perror("read"); + return -1; + } + + return ctx->remaining == 0; +} + +static int longhorn_handle_name(struct longhorn_sync_client_context *ctx) { + if (longhorn_read(ctx)) { + printf("received name = %s\n", ctx->name); + + ctx->state = HEADER; + ctx->remaining = sizeof(ctx->header); + ctx->current = &ctx->header; + } + + return 0; +} + +static void longhorn_lvol_create_complete_cb(void *arg, struct spdk_lvol *lvol, int lvolerrno) { + struct longhorn_sync_client_context *ctx = arg; + + ctx->channel = spdk_bs_alloc_io_channel(ctx->lvs->blobstore); + ctx->lvol = lvol; +} + + + +static int longhorn_handle_header(struct longhorn_sync_client_context *ctx) { + if (longhorn_read(ctx)) { + printf("allocated clusters = %lu\n", ctx->header.allocated_clusters); + printf("num cluster = %lu\n", ctx->header.num_clusters); + + vbdev_lvol_create(ctx->lvs, ctx->name, ctx->header.num_clusters * ctx->header.cluster_size, true, LVOL_CLEAR_WITH_DEFAULT, longhorn_lvol_create_complete_cb, ctx); + + ctx->cluster = spdk_malloc(ctx->header.cluster_size, ALIGN_4K, NULL, + SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); + + ctx->io_units_per_cluster = ctx->header.cluster_size / ctx->header.io_unit_size; + + + ctx->state = TABLE; + ctx->remaining = sizeof(uint32_t) * ctx->header.num_clusters; + ctx->table = calloc(1, ctx->remaining); + ctx->current = ctx->table; + + } + + return 0; +} + +static int longhorn_handle_table(struct longhorn_sync_client_context *ctx) { + + if (longhorn_read(ctx)) { + printf("read table\n"); + ctx->state = DATA; + ctx->remaining = ctx->header.cluster_size; + ctx->current = ctx->cluster; + + + } + + return 0; +} + + +static void longhorn_write_cb(void *arg, int bserrno) { + struct longhorn_sync_client_context *ctx = arg; + + if (bserrno) { + ctx->state = DONE; + return; + } + + if (++ctx->pos >= ctx->header.allocated_clusters) { + printf("copy complete\n"); + ctx->state = DONE; + return; + } + + ctx->current = ctx->cluster; + ctx->remaining = ctx->header.cluster_size; + ctx->write_in_progress = false; +} + + +static int longhorn_handle_data(struct longhorn_sync_client_context *ctx) { + uint64_t offset = ctx->table[ctx->pos] * ctx->io_units_per_cluster; + + if (longhorn_read(ctx)) { + if (ctx->lvol != NULL && !ctx->write_in_progress) { + printf("writing cluster %lu\n", ctx->table[ctx->pos]); + ctx->write_in_progress = true; + + spdk_blob_io_write(ctx->lvol->blob, ctx->channel, + ctx->cluster, offset, + ctx->io_units_per_cluster, + longhorn_write_cb, ctx); + } + } + + return 0; +} + + + + +static int longhorn_sync_client_poll(void *arg) { + struct longhorn_sync_client_context *context = arg; + struct timeval timeout = {0, 0}; + fd_set rdset; + + FD_ZERO(&rdset); + + FD_SET(context->fd, &rdset); + + if (select(context->fd + 1, &rdset, NULL, NULL, &timeout) > 0) { + printf("client readable\n"); + switch (context->state) { + case NAME: + longhorn_handle_name(context); + break; + case HEADER: + longhorn_handle_header(context); + break; + case TABLE: + longhorn_handle_table(context); + break; + case DATA: + longhorn_handle_data(context); + break; + case DONE: + break; + } + + } + + return SPDK_POLLER_BUSY; +} + +static void set_nonblocking(int fd) { + int fdflags = fcntl(fd, F_GETFL); + + fdflags |= O_NONBLOCK; + fcntl(fd, F_SETFL, fdflags); +} + + +int longhorn_sync_client(const char *addr, uint16_t port, uint64_t blob_id, struct spdk_lvol_store *lvs) { + struct sockaddr_in sockaddr = {'\0'}; + int sockfd; + struct longhorn_sync_client_context *ctx; + + inet_aton(addr, &sockaddr.sin_addr); + sockaddr.sin_port = htons(port); + sockaddr.sin_family = AF_INET; + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + + if (sockfd <= 0) { + return -errno; + } + + if (connect(sockfd, (struct sockaddr *) &sockaddr, sizeof(sockaddr)) < 0) { + return -errno; + } + + ctx = calloc(1, sizeof(struct longhorn_sync_client_context)); + ctx->fd = sockfd; + ctx->state = NAME; + ctx->remaining = sizeof(ctx->name); + ctx->current = ctx->name; + ctx->lvs = lvs; + + + set_nonblocking(ctx->fd); + + ctx->poller = SPDK_POLLER_REGISTER(longhorn_sync_client_poll, ctx, 4000); + + write(sockfd, &blob_id, sizeof (uint64_t)); + +} diff --git a/module/bdev/longhorn/bdev_longhorn_sync_client.h b/module/bdev/longhorn/bdev_longhorn_sync_client.h new file mode 100644 index 000000000..c96c3330f --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_sync_client.h @@ -0,0 +1,7 @@ +#ifndef SPDK_BDEV_LONGHORN_REBUILD_RPC__H +#define SPDK_BDEV_LONGHORN_REBUILD_RPC__H + +int longhorn_sync_client(const char *addr, uint16_t port, uint64_t blob_id, struct spdk_lvol_store *lvs); + +#endif /* SPDK_BDEV_LONGHORN_REBUILD_RPC__H */ + diff --git a/module/bdev/longhorn/bdev_longhorn_volume.c b/module/bdev/longhorn/bdev_longhorn_volume.c new file mode 100644 index 000000000..f30f1d31b --- /dev/null +++ b/module/bdev/longhorn/bdev_longhorn_volume.c @@ -0,0 +1 @@ +void bdev_longhorn_volume_create diff --git a/scripts/rpc.py b/scripts/rpc.py index f8ad88a2c..20a489722 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -1720,6 +1720,15 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse p.add_argument('-l', '--lvs-name', help='lvol store name', required=False) p.set_defaults(func=bdev_lvol_get_lvstores) + def lvol_show_blobs(args): + rpc.bdev.lvol_show_blobs(args.client, + lvs=args.lvs) + + p = subparsers.add_parser('lvol_show_blobs', aliases=['show_blobs'], + help='show blob') + p.add_argument('lvs', help='lvs name') + p.set_defaults(func=lvol_show_blobs) + def bdev_raid_get_bdevs(args): print_array(rpc.bdev.bdev_raid_get_bdevs(args.client, category=args.category)) diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index b892473b0..abfef5215 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -1266,3 +1266,15 @@ def bdev_nvme_get_controller_health_info(client, name): params = {} params['name'] = name return client.call('bdev_nvme_get_controller_health_info', params) + +def lvol_show_blobs(client, lvs): + """Examine a bdev manually. If the bdev does not exist yet when this RPC is called, + it will be examined when it is created + + Args: + name: name of the bdev + """ + params = { + 'lvs': lvs + } + return client.call('lvol_show_blobs', params) diff --git a/test/unit/lib/rpc/rpc.c/rpc_ut.c b/test/unit/lib/rpc/rpc.c/rpc_ut.c index fafbd37df..56e05e488 100644 --- a/test/unit/lib/rpc/rpc.c/rpc_ut.c +++ b/test/unit/lib/rpc/rpc.c/rpc_ut.c @@ -132,7 +132,7 @@ test_jsonrpc_handler(void) /* Case 1: Method not found */ method.type = SPDK_JSON_VAL_INVALID; - jsonrpc_handler(request, &method, ¶ms); + spdk_rpc_handler(request, &method, ¶ms); CU_ASSERT(g_rpc_err == SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND); /* Case 2: Method is alias */ @@ -148,21 +148,21 @@ test_jsonrpc_handler(void) /* m->state_mask & g_rpc_state == g_rpc_state */ g_rpc_err = -1; g_rpc_state = SPDK_RPC_STARTUP; - jsonrpc_handler(request, &method, ¶ms); + spdk_rpc_handler(request, &method, ¶ms); CU_ASSERT(g_rpc_err == 0); /* g_rpc_state == SPDK_RPC_STARTUP */ is_alias_of.state_mask = SPDK_RPC_RUNTIME; g_rpc_err = -1; g_rpc_state = SPDK_RPC_STARTUP; - jsonrpc_handler(request, &method, ¶ms); + spdk_rpc_handler(request, &method, ¶ms); CU_ASSERT(g_rpc_err == SPDK_JSONRPC_ERROR_INVALID_STATE); /* SPDK_RPC_RUNTIME is invalid for the aliastest RPC */ is_alias_of.state_mask = SPDK_RPC_STARTUP; g_rpc_err = -1; g_rpc_state = SPDK_RPC_RUNTIME; - jsonrpc_handler(request, &method, ¶ms); + spdk_rpc_handler(request, &method, ¶ms); CU_ASSERT(g_rpc_err == SPDK_JSONRPC_ERROR_INVALID_STATE); SLIST_REMOVE_HEAD(&g_rpc_methods, slist);